577 files changed, 17911 insertions, 7544 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5313d11..7e4da71 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -156,6 +156,44 @@ else()
   option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
+option(LLVM_USE_INTEL_JITEVENTS
+  "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code"
+  OFF)
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  # Verify we are on a supported platform
+  if( CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Linux" )
+    # Directory where Intel Parallel Amplifier XE 2011 is installed.
+    if ( WIN32 )
+      set(LLVM_INTEL_JITEVENTS_DIR $ENV{VTUNE_AMPLIFIER_XE_2011_DIR})
+    else ( WIN32 )
+      set(LLVM_INTEL_JITEVENTS_DIR "/opt/intel/vtune_amplifier_xe_2011")
+    endif ( WIN32 )
+
+    # Set include and library search paths for Intel JIT Events API
+    set(LLVM_INTEL_JITEVENTS_INCDIR "${LLVM_INTEL_JITEVENTS_DIR}/include")
+
+    if ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib64")
+    else ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib32")
+    endif ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+  else()
+    message(FATAL_ERROR
+      "Intel JIT API support is available on Linux and Windows only.")
+  endif()
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+option(LLVM_USE_OPROFILE
+  "Use opagent JIT interface to inform OProfile about JIT code" OFF)
+
+# If enabled, ierify we are on a platform that supports oprofile.
+if( LLVM_USE_OPROFILE )
+  if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+    message(FATAL_ERROR "OProfile support is available on Linux only.") 
+  endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_OPROFILE )
+
 # Define an option controlling whether we should build for 32-bit on 64-bit
 # platforms, where supported.
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
diff --git a/CREDITS.TXT b/CREDITS.TXT
index 0f7a92d..f468cd5 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -320,11 +320,19 @@ W: http://vladimir_prus.blogspot.com
 E: ghost@cs.msu.su
 D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass
 
+N: Xerxes Ranby
+E: xerxes@zafena.se
+D: Cmake dependency chain and various bug fixes
+
 N: Chad Rosier
 E: mcrosier@apple.com
 D: ARM fast-isel improvements
 D: Performance monitoring
 
+N: Nadav Rotem
+E: nadav.rotem@intel.com
+D: Vector code generation improvements.
+
 N: Roman Samoilov
 E: roman@codedgers.com
 D: MSIL backend
@@ -375,14 +383,6 @@ E: lauro.venancio@indt.org.br
 D: ARM backend improvements
 D: Thread Local Storage implementation
 
-N: Xerxes Ranby
-E: xerxes@zafena.se
-D: Cmake dependency chain and various bug fixes
-
-N: Nadav Rotem
-E: nadav.rotem@intel.com
-D: Vector code generation improvements.
-
 N: Bill Wendling
 E: wendling@apple.com
 D: Exception handling
diff --git a/Makefile.config.in b/Makefile.config.in
index 7b9af41..33fbb2a 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -175,7 +175,6 @@ SED        := @SED@
 TAR        := @TAR@
 
 # Paths to miscellaneous programs we hope are present but might not be
-PERL       := @PERL@
 BZIP2      := @BZIP2@
 CAT        := @CAT@
 DOT        := @DOT@
@@ -194,7 +193,6 @@ RUNTEST    := @RUNTEST@
 TCLSH      := @TCLSH@
 ZIP        := @ZIP@
 
-HAVE_PERL    := @HAVE_PERL@
 HAVE_PTHREAD := @HAVE_PTHREAD@
 
 LIBS       := @LIBS@
@@ -345,3 +343,11 @@ LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
 # Flags supported by the linker.
 # bfd ld / gold --version-script=file
 HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
+
+# Flags to control building support for Intel JIT Events API
+USE_INTEL_JITEVENTS := @USE_INTEL_JITEVENTS@
+INTEL_JITEVENTS_INCDIR := @INTEL_JITEVENTS_INCDIR@
+INTEL_JITEVENTS_LIBDIR := @INTEL_JITEVENTS_LIBDIR@
+
+# Flags to control building support for OProfile JIT API
+USE_OPROFILE := @USE_OPROFILE@
diff --git a/Makefile.rules b/Makefile.rules
index a56f26e..76cec18 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -1070,7 +1070,7 @@ ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
 NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).map
 $(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
 	$(Verb) echo "{" > $@
-	$(Verb) grep -q "\<" $< && echo "  global:" >> $@ || :
+	$(Verb) grep -q '[[:alnum:]_]' $< && echo "  global:" >> $@ || :
 	$(Verb) sed -e 's/$$/;/' -e 's/^/    /' < $< >> $@
 ifneq ($(HOST_OS),OpenBSD)
 	$(Verb) echo "  local: *;" >> $@
@@ -1492,12 +1492,23 @@ else
 $(ToolBuildPath): $(ToolDir)/.dir
 endif
 
+ifdef CODESIGN_TOOLS
 $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
 	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
 	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
 	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
           $(StripWarnMsg)
+	$(Echo) ======= Code-Signing $(BuildMode) Executable $(TOOLNAME)
+	$(Verb) codesign -s - $@
+else
+$(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
+	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
+	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
+	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
+          $(StripWarnMsg)
+endif
 
 ifneq ($(strip $(ToolAliasBuildPath)),)
 $(ToolAliasBuildPath): $(ToolBuildPath)
diff --git a/README.txt b/README.txt
index 6bed7da..34a568f 100644
--- a/README.txt
+++ b/README.txt
@@ -15,3 +15,4 @@ assistance with LLVM.
 If you're writing a package for LLVM, see docs/Packaging.html for our
 suggestions.
 
+
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index e18725d..c002719 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -1005,16 +1005,6 @@ if test "$XDOT_PY" != "echo xdot.py" ; then
    [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
 fi
 
-dnl Look for a sufficiently recent version of Perl.
-LLVM_PROG_PERL([5.006])
-AC_SUBST(PERL)
-if test x"$PERL" = xnone; then
-   AC_SUBST(HAVE_PERL,0)
-   AC_MSG_ERROR([perl is required but was not found, please install it])
-else
-   AC_SUBST(HAVE_PERL,1)
-fi
-
 dnl Find the install program
 AC_PROG_INSTALL
 dnl Prepend src dir to install path dir if it's a relative path
@@ -1193,29 +1183,83 @@ AC_ARG_WITH(oprofile,
         *) llvm_cv_oppath="${withval}/lib/oprofile"
            CPPFLAGS="-I${withval}/include";;
       esac
-      if test -n "$llvm_cv_oppath" ; then
-        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
-        dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
-        dnl libbfd is not included properly in libopagent in some Debian
-        dnl versions.  If libbfd isn't found at all, we assume opagent works
-        dnl anyway.
-        AC_SEARCH_LIBS(bfd_init, bfd, [], [])
-        AC_SEARCH_LIBS(op_open_agent, opagent, [], [
-          echo "Error! You need to have libopagent around."
-          exit -1
-        ])
-        AC_CHECK_HEADER([opagent.h], [], [
-          echo "Error! You need to have opagent.h around."
-          exit -1
-          ])
-      fi
+      case $llvm_cv_os_type in
+        Linux)
+          if test -n "$llvm_cv_oppath" ; then
+            LIBS="$LIBS -lopagent -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+            dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
+            dnl libbfd is not included properly in libopagent in some Debian
+            dnl versions.  If libbfd isn't found at all, we assume opagent works
+            dnl anyway.
+            AC_SEARCH_LIBS(bfd_init, bfd, [], [])
+            AC_SEARCH_LIBS(op_open_agent, opagent, [], [
+              echo "Error! You need to have libopagent around."
+              exit -1
+            ])
+            AC_CHECK_HEADER([opagent.h], [], [
+              echo "Error! You need to have opagent.h around."
+              exit -1
+              ])
+          fi ;;
+        *)
+          AC_MSG_ERROR([OProfile support is available on Linux only.]) ;;
+      esac 
     ],
     [
       AC_SUBST(USE_OPROFILE, [0])
     ])
-AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE,
+AC_DEFINE_UNQUOTED([LLVM_USE_OPROFILE],$USE_OPROFILE,
                    [Define if we have the oprofile JIT-support library])
 
+dnl Enable support for Intel JIT Events API.
+AC_ARG_WITH(intel-jitevents,
+  AS_HELP_STRING([--with-intel-jitevents=<vtune-amplifier-dir>],
+    [Specify location of run-time support library for Intel JIT API (default=/opt/intel/vtune_amplifier_xe_2011)]),
+    [
+      case $llvm_cv_os_type in
+        Linux|Win32|Cygwin|MingW) ;;
+        *)
+          AC_MSG_ERROR([
+            Intel JIT API support is available on Linux and Windows only."]) ;;
+      esac
+
+      AC_SUBST(USE_INTEL_JITEVENTS, [1])
+      case "$llvm_cv_target_arch" in
+        x86)    llvm_intel_jitevents_archdir="lib32";;
+        x86_64) llvm_intel_jitevents_archdir="lib64";;
+        *)      echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
+                exit -1;;
+      esac
+      INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
+      INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
+      case "$withval" in
+        /* | [[A-Za-z]]:[[\\/]]*) INTEL_JITEVENTS_INCDIR=$withval/include
+                                  INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
+        *) ;;
+      esac
+
+      AC_SUBST(INTEL_JITEVENTS_INCDIR)
+      AC_SUBST(INTEL_JITEVENTS_LIBDIR)
+
+      LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
+      CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
+
+      AC_SEARCH_LIBS(iJIT_IsProfilingActive, jitprofiling, [], [
+        echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+      ])
+      AC_CHECK_HEADER([jitprofiling.h], [], [
+        echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+      ])
+
+    ],
+    [
+      AC_SUBST(USE_INTEL_JITEVENTS, [0])
+    ])
+AC_DEFINE_UNQUOTED([LLVM_USE_INTEL_JITEVENTS],$USE_INTEL_JITEVENTS,
+                   [Define if we have the Intel JIT API runtime support library])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 6: Check for header files
diff --git a/autoconf/m4/path_perl.m4 b/autoconf/m4/path_perl.m4
deleted file mode 100644
index 406656c..0000000
--- a/autoconf/m4/path_perl.m4
+++ /dev/null
@@ -1,16 +0,0 @@
-dnl Check for a reasonable version of Perl.
-dnl   $1 - Minimum Perl version.  Typically 5.006.
-dnl 
-AC_DEFUN([LLVM_PROG_PERL], [
-AC_PATH_PROG(PERL, [perl], [none])
-if test "$PERL" != "none"; then
-  AC_MSG_CHECKING(for Perl $1 or newer)
-  if $PERL -e 'use $1;' 2>&1 > /dev/null; then
-    AC_MSG_RESULT(yes)
-  else
-    PERL=none
-    AC_MSG_RESULT(not found)
-  fi
-fi
-])
-
diff --git a/bindings/python/README.txt b/bindings/python/README.txt
new file mode 100644
index 0000000..96e3343
--- /dev/null
+++ b/bindings/python/README.txt
@@ -0,0 +1,67 @@
+This directory contains Python bindings for LLVM's C library.
+
+The bindings are currently a work in progress and are far from complete.
+Use at your own risk.
+
+Developer Info
+==============
+
+The single Python package is "llvm." Modules inside this package roughly
+follow the names of the modules/headers defined by LLVM's C API.
+
+Testing
+-------
+
+All test code is location in llvm/tests. Tests are written as classes
+which inherit from llvm.tests.base.TestBase, which is a convenience base
+class that provides common functionality.
+
+Tests can be executed by installing nose:
+
+    pip install nosetests
+
+Then by running nosetests:
+
+    nosetests
+
+To see more output:
+
+    nosetests -v
+
+To step into the Python debugger while running a test, add the following
+to your test at the point you wish to enter the debugger:
+
+    import pdb; pdb.set_trace()
+
+Then run nosetests:
+
+    nosetests -s -v
+
+You should strive for high code coverage. To see current coverage:
+
+    pip install coverage
+    nosetests --with-coverage --cover-html
+
+Then open cover/index.html in your browser of choice to see the code coverage.
+
+Style Convention
+----------------
+
+All code should pass PyFlakes. First, install PyFlakes:
+
+    pip install pyflakes
+
+Then at any time run it to see a report:
+
+    pyflakes .
+
+Eventually we'll provide a Pylint config file. In the meantime, install
+Pylint:
+
+    pip install pylint
+
+And run:
+
+    pylint llvm
+
+And try to keep the number of violations to a minimum.
diff --git a/bindings/python/llvm/__init__.py b/bindings/python/llvm/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bindings/python/llvm/__init__.py
diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py
new file mode 100644
index 0000000..0c5fcd0
--- /dev/null
+++ b/bindings/python/llvm/common.py
@@ -0,0 +1,106 @@
+#===- common.py - Python LLVM Bindings -----------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from ctypes import POINTER
+from ctypes import c_void_p
+from ctypes import cdll
+
+import ctypes.util
+
+__all__ = [
+    'c_object_p',
+    'find_library',
+    'get_library',
+]
+
+c_object_p = POINTER(c_void_p)
+
+class LLVMObject(object):
+    """Base class for objects that are backed by an LLVM data structure.
+
+    This class should never be instantiated outside of this package.
+    """
+    def __init__(self, ptr, ownable=True, disposer=None):
+        assert isinstance(ptr, c_object_p)
+
+        self._ptr = self._as_parameter_ = ptr
+
+        self._self_owned = True
+        self._ownable = ownable
+        self._disposer = disposer
+
+        self._owned_objects = []
+
+    def take_ownership(self, obj):
+        """Take ownership of another object.
+
+        When you take ownership of another object, you are responsible for
+        destroying that object. In addition, a reference to that object is
+        placed inside this object so the Python garbage collector will not
+        collect the object while it is still alive in libLLVM.
+
+        This method should likely only be called from within modules inside
+        this package.
+        """
+        assert isinstance(obj, LLVMObject)
+
+        self._owned_objects.append(obj)
+        obj._self_owned = False
+
+    def from_param(self):
+        """ctypes function that converts this object to a function parameter."""
+        return self._as_parameter_
+
+    def __del__(self):
+        if not hasattr(self, '_self_owned') or not hasattr(self, '_disposer'):
+            return
+
+        if self._self_owned and self._disposer:
+            self._disposer(self)
+
+class CachedProperty(object):
+    """Decorator that caches the result of a property lookup.
+
+    This is a useful replacement for @property. It is recommended to use this
+    decorator on properties that invoke C API calls for which the result of the
+    call will be idempotent.
+    """
+    def __init__(self, wrapped):
+        self.wrapped = wrapped
+        try:
+            self.__doc__ = wrapped.__doc__
+        except: # pragma: no cover
+            pass
+
+    def __get__(self, instance, instance_type=None):
+        if instance is None:
+            return self
+
+        value = self.wrapped(instance)
+        setattr(instance, self.wrapped.__name__, value)
+
+        return value
+
+def find_library():
+    # FIXME should probably have build system define absolute path of shared
+    # library at install time.
+    for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']:
+        result = ctypes.util.find_library(lib)
+        if result:
+            return result
+
+    return None
+
+def get_library():
+    """Obtain a reference to the llvm library."""
+    lib = find_library()
+    if not lib:
+        raise Exception('LLVM shared library not found!')
+
+    return cdll.LoadLibrary(lib)
diff --git a/bindings/python/llvm/core.py b/bindings/python/llvm/core.py
new file mode 100644
index 0000000..6756637
--- /dev/null
+++ b/bindings/python/llvm/core.py
@@ -0,0 +1,98 @@
+#===- core.py - Python LLVM Bindings -------------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+
+from . import enumerations
+
+from ctypes import POINTER
+from ctypes import byref
+from ctypes import c_char_p
+
+__all__ = [
+    "lib",
+    "MemoryBuffer",
+]
+
+lib = get_library()
+
+class OpCode(object):
+    """Represents an individual OpCode enumeration."""
+
+    _value_map = {}
+
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return 'OpCode.%s' % self.name
+
+    @staticmethod
+    def from_value(value):
+        """Obtain an OpCode instance from a numeric value."""
+        result = OpCode._value_map.get(value, None)
+
+        if result is None:
+            raise ValueError('Unknown OpCode: %d' % value)
+
+        return result
+
+    @staticmethod
+    def register(name, value):
+        """Registers a new OpCode enumeration.
+
+        This is called by this module for each enumeration defined in
+        enumerations. You should not need to call this outside this module.
+        """
+        if value in OpCode._value_map:
+            raise ValueError('OpCode value already registered: %d' % value)
+
+        opcode = OpCode(name, value)
+        OpCode._value_map[value] = opcode
+        setattr(OpCode, name, opcode)
+
+class MemoryBuffer(LLVMObject):
+    """Represents an opaque memory buffer."""
+
+    def __init__(self, filename=None):
+        """Create a new memory buffer.
+
+        Currently, we support creating from the contents of a file at the
+        specified filename.
+        """
+        if filename is None:
+            raise Exception("filename argument must be defined")
+
+        memory = c_object_p()
+        out = c_char_p(None)
+
+        result = lib.LLVMCreateMemoryBufferWithContentsOfFile(filename,
+                byref(memory), byref(out))
+
+        if result:
+            raise Exception("Could not create memory buffer: %s" % out.value)
+
+        LLVMObject.__init__(self, memory, disposer=lib.LLVMDisposeMemoryBuffer)
+
+def register_library(library):
+    library.LLVMCreateMemoryBufferWithContentsOfFile.argtypes = [c_char_p,
+            POINTER(c_object_p), POINTER(c_char_p)]
+    library.LLVMCreateMemoryBufferWithContentsOfFile.restype = bool
+
+    library.LLVMDisposeMemoryBuffer.argtypes = [MemoryBuffer]
+
+def register_enumerations():
+    for name, value in enumerations.OpCodes:
+        OpCode.register(name, value)
+
+register_library(lib)
+register_enumerations()
diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py
new file mode 100644
index 0000000..5030b98
--- /dev/null
+++ b/bindings/python/llvm/disassembler.py
@@ -0,0 +1,134 @@
+#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from ctypes import CFUNCTYPE
+from ctypes import POINTER
+from ctypes import addressof
+from ctypes import byref
+from ctypes import c_byte
+from ctypes import c_char_p
+from ctypes import c_int
+from ctypes import c_size_t
+from ctypes import c_ubyte
+from ctypes import c_uint64
+from ctypes import c_void_p
+from ctypes import cast
+
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+
+__all__ = [
+    'Disassembler',
+]
+
+lib = get_library()
+callbacks = {}
+
+class Disassembler(LLVMObject):
+    """Represents a disassembler instance.
+
+    Disassembler instances are tied to specific "triple," which must be defined
+    at creation time.
+
+    Disassembler instances can disassemble instructions from multiple sources.
+    """
+    def __init__(self, triple):
+        """Create a new disassembler instance.
+
+        The triple argument is the triple to create the disassembler for. This
+        is something like 'i386-apple-darwin9'.
+        """
+        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
+                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
+        if not ptr.contents:
+            raise Exception('Could not obtain disassembler for triple: %s' %
+                            triple)
+
+        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
+
+    def get_instruction(self, source, pc=0):
+        """Obtain the next instruction from an input source.
+
+        The input source should be a str or bytearray or something that
+        represents a sequence of bytes.
+
+        This function will start reading bytes from the beginning of the
+        source.
+
+        The pc argument specifies the address that the first byte is at.
+
+        This returns a 2-tuple of:
+
+          long number of bytes read. 0 if no instruction was read.
+          str representation of instruction. This will be the assembly that
+            represents the instruction.
+        """
+        buf = cast(c_char_p(source), POINTER(c_ubyte))
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
+                                           c_uint64(pc), out_str, 255)
+
+        return (result, out_str.value)
+
+    def get_instructions(self, source, pc=0):
+        """Obtain multiple instructions from an input source.
+
+        This is like get_instruction() except it is a generator for all
+        instructions within the source. It starts at the beginning of the
+        source and reads instructions until no more can be read.
+
+        This generator returns 3-tuple of:
+
+          long address of instruction.
+          long size of instruction, in bytes.
+          str representation of instruction.
+        """
+        source_bytes = c_char_p(source)
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        # This could probably be written cleaner. But, it does work.
+        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
+        offset = 0
+        address = pc
+        end_address = pc + len(source)
+        while address < end_address:
+            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
+            result = lib.LLVMDisasmInstruction(self, b,
+                    c_uint64(len(source) - offset), c_uint64(address),
+                    out_str, 255)
+
+            if result == 0:
+                break
+
+            yield (address, result, out_str.value)
+
+            address += result
+            offset += result
+
+
+def register_library(library):
+    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
+        callbacks['op_info'], callbacks['symbol_lookup']]
+    library.LLVMCreateDisasm.restype = c_object_p
+
+    library.LLVMDisasmDispose.argtypes = [Disassembler]
+
+    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
+            c_uint64, c_uint64, c_char_p, c_size_t]
+    library.LLVMDisasmInstruction.restype = c_size_t
+
+callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
+                                 c_int, c_void_p)
+callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
+                                       POINTER(c_uint64), c_uint64,
+                                       POINTER(c_char_p))
+
+register_library(lib)
diff --git a/bindings/python/llvm/enumerations.py b/bindings/python/llvm/enumerations.py
new file mode 100644
index 0000000..f49d2fa
--- /dev/null
+++ b/bindings/python/llvm/enumerations.py
@@ -0,0 +1,211 @@
+#===- enumerations.py - Python LLVM Enumerations -------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+LLVM Enumerations
+=================
+
+This file defines enumerations from LLVM.
+
+Each enumeration is exposed as a list of 2-tuples. These lists are consumed by
+dedicated types elsewhere in the package. The enumerations are centrally
+defined in this file so they are easier to locate and maintain.
+"""
+
+__all__ = [
+    'Attributes',
+    'OpCodes',
+    'TypeKinds',
+    'Linkages',
+    'Visibility',
+    'CallConv',
+    'IntPredicate',
+    'RealPredicate',
+    'LandingPadClauseTy',
+]
+
+Attributes = [
+    ('ZExt', 1 << 0),
+    ('MSExt', 1 << 1),
+    ('NoReturn', 1 << 2),
+    ('InReg', 1 << 3),
+    ('StructRet', 1 << 4),
+    ('NoUnwind', 1 << 5),
+    ('NoAlias', 1 << 6),
+    ('ByVal', 1 << 7),
+    ('Nest', 1 << 8),
+    ('ReadNone', 1 << 9),
+    ('ReadOnly', 1 << 10),
+    ('NoInline', 1 << 11),
+    ('AlwaysInline', 1 << 12),
+    ('OptimizeForSize', 1 << 13),
+    ('StackProtect', 1 << 14),
+    ('StackProtectReq', 1 << 15),
+    ('Alignment', 31 << 16),
+    ('NoCapture', 1 << 21),
+    ('NoRedZone', 1 << 22),
+    ('ImplicitFloat', 1 << 23),
+    ('Naked', 1 << 24),
+    ('InlineHint', 1 << 25),
+    ('StackAlignment', 7 << 26),
+    ('ReturnsTwice', 1 << 29),
+    ('UWTable', 1 << 30),
+    ('NonLazyBind', 1 << 31),
+]
+
+OpCodes = [
+    ('Ret', 1),
+    ('Br', 2),
+    ('Switch', 3),
+    ('IndirectBr', 4),
+    ('Invoke', 5),
+    ('Unreachable', 7),
+    ('Add', 8),
+    ('FAdd', 9),
+    ('Sub', 10),
+    ('FSub', 11),
+    ('Mul', 12),
+    ('FMul', 13),
+    ('UDiv', 14),
+    ('SDiv', 15),
+    ('FDiv', 16),
+    ('URem', 17),
+    ('SRem', 18),
+    ('FRem', 19),
+    ('Shl', 20),
+    ('LShr', 21),
+    ('AShr', 22),
+    ('And', 23),
+    ('Or', 24),
+    ('Xor', 25),
+    ('Alloca', 26),
+    ('Load', 27),
+    ('Store', 28),
+    ('GetElementPtr', 29),
+    ('Trunc', 30),
+    ('ZExt', 31),
+    ('SExt', 32),
+    ('FPToUI', 33),
+    ('FPToSI', 34),
+    ('UIToFP', 35),
+    ('SIToFP', 36),
+    ('FPTrunc', 37),
+    ('FPExt', 38),
+    ('PtrToInt', 39),
+    ('IntToPtr', 40),
+    ('BitCast', 41),
+    ('ICmp', 42),
+    ('FCmpl', 43),
+    ('PHI', 44),
+    ('Call', 45),
+    ('Select', 46),
+    ('UserOp1', 47),
+    ('UserOp2', 48),
+    ('AArg', 49),
+    ('ExtractElement', 50),
+    ('InsertElement', 51),
+    ('ShuffleVector', 52),
+    ('ExtractValue', 53),
+    ('InsertValue', 54),
+    ('Fence', 55),
+    ('AtomicCmpXchg', 56),
+    ('AtomicRMW', 57),
+    ('Resume', 58),
+    ('LandingPad', 59),
+]
+
+TypeKinds = [
+    ('Void', 0),
+    ('Half', 1),
+    ('Float', 2),
+    ('Double', 3),
+    ('X86_FP80', 4),
+    ('FP128', 5),
+    ('PPC_FP128', 6),
+    ('Label', 7),
+    ('Integer', 8),
+    ('Function', 9),
+    ('Struct', 10),
+    ('Array', 11),
+    ('Pointer', 12),
+    ('Vector', 13),
+    ('Metadata', 14),
+    ('X86_MMX', 15),
+]
+
+Linkages = [
+    ('External', 0),
+    ('AvailableExternally', 1),
+    ('LinkOnceAny', 2),
+    ('LinkOnceODR', 3),
+    ('WeakAny', 4),
+    ('WeakODR', 5),
+    ('Appending', 6),
+    ('Internal', 7),
+    ('Private', 8),
+    ('DLLImport', 9),
+    ('DLLExport', 10),
+    ('ExternalWeak', 11),
+    ('Ghost', 12),
+    ('Common', 13),
+    ('LinkerPrivate', 14),
+    ('LinkerPrivateWeak', 15),
+    ('LinkerPrivateWeakDefAuto', 16),
+]
+
+Visibility = [
+    ('Default', 0),
+    ('Hidden', 1),
+    ('Protected', 2),
+]
+
+CallConv = [
+    ('CCall', 0),
+    ('FastCall', 8),
+    ('ColdCall', 9),
+    ('X86StdcallCall', 64),
+    ('X86FastcallCall', 65),
+]
+
+IntPredicate = [
+    ('EQ', 32),
+    ('NE', 33),
+    ('UGT', 34),
+    ('UGE', 35),
+    ('ULT', 36),
+    ('ULE', 37),
+    ('SGT', 38),
+    ('SGE', 39),
+    ('SLT', 40),
+    ('SLE', 41),
+]
+
+RealPredicate = [
+    ('PredicateFalse', 0),
+    ('OEQ', 1),
+    ('OGT', 2),
+    ('OGE', 3),
+    ('OLT', 4),
+    ('OLE', 5),
+    ('ONE', 6),
+    ('ORD', 7),
+    ('UNO', 8),
+    ('UEQ', 9),
+    ('UGT', 10),
+    ('UGE', 11),
+    ('ULT', 12),
+    ('ULE', 13),
+    ('UNE', 14),
+    ('PredicateTrue', 15),
+]
+
+LandingPadClauseTy = [
+    ('Catch', 0),
+    ('Filter', 1),
+]
diff --git a/bindings/python/llvm/object.py b/bindings/python/llvm/object.py
new file mode 100644
index 0000000..473aa3a
--- /dev/null
+++ b/bindings/python/llvm/object.py
@@ -0,0 +1,523 @@
+#===- object.py - Python Object Bindings --------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+Object File Interface
+=====================
+
+This module provides an interface for reading information from object files
+(e.g. binary executables and libraries).
+
+Using this module, you can obtain information about an object file's sections,
+symbols, and relocations. These are represented by the classes ObjectFile,
+Section, Symbol, and Relocation, respectively.
+
+Usage
+-----
+
+The only way to use this module is to start by creating an ObjectFile. You can
+create an ObjectFile by loading a file (specified by its path) or by creating a
+llvm.core.MemoryBuffer and loading that.
+
+Once you have an object file, you can inspect its sections and symbols directly
+by calling get_sections() and get_symbols() respectively. To inspect
+relocations, call get_relocations() on a Section instance.
+
+Iterator Interface
+------------------
+
+The LLVM bindings expose iteration over sections, symbols, and relocations in a
+way that only allows one instance to be operated on at a single time. This is
+slightly annoying from a Python perspective, as it isn't very Pythonic to have
+objects that "expire" but are still active from a dynamic language.
+
+To aid working around this limitation, each Section, Symbol, and Relocation
+instance caches its properties after first access. So, if the underlying
+iterator is advanced, the properties can still be obtained provided they have
+already been retrieved.
+
+In addition, we also provide a "cache" method on each class to cache all
+available data. You can call this on each obtained instance. Or, you can pass
+cache=True to the appropriate get_XXX() method to have this done for you.
+
+Here are some examples on how to perform iteration:
+
+    obj = ObjectFile(filename='/bin/ls')
+
+    # This is OK. Each Section is only accessed inside its own iteration slot.
+    section_names = []
+    for section in obj.get_sections():
+        section_names.append(section.name)
+
+    # This is NOT OK. You perform a lookup after the object has expired.
+    symbols = list(obj.get_symbols())
+    for symbol in symbols:
+        print symbol.name # This raises because the object has expired.
+
+    # In this example, we mix a working and failing scenario.
+    symbols = []
+    for symbol in obj.get_symbols():
+        symbols.append(symbol)
+        print symbol.name
+
+    for symbol in symbols:
+        print symbol.name # OK
+        print symbol.address # NOT OK. We didn't look up this property before.
+
+    # Cache everything up front.
+    symbols = list(obj.get_symbols(cache=True))
+    for symbol in symbols:
+        print symbol.name # OK
+
+"""
+
+from ctypes import c_char_p
+from ctypes import c_uint64
+
+from .common import CachedProperty
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+from .core import MemoryBuffer
+
+__all__ = [
+    "lib",
+    "ObjectFile",
+    "Relocation",
+    "Section",
+    "Symbol",
+]
+
+class ObjectFile(LLVMObject):
+    """Represents an object/binary file."""
+
+    def __init__(self, filename=None, contents=None):
+        """Construct an instance from a filename or binary data.
+
+        filename must be a path to a file that can be opened with open().
+        contents can be either a native Python buffer type (like str) or a
+        llvm.core.MemoryBuffer instance.
+        """
+        if contents:
+            assert isinstance(contents, MemoryBuffer)
+
+        if filename is not None:
+            contents = MemoryBuffer(filename=filename)
+
+        if contents is None:
+            raise Exception('No input found.')
+
+        ptr = lib.LLVMCreateObjectFile(contents)
+        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
+        self.take_ownership(contents)
+
+    def get_sections(self, cache=False):
+        """Obtain the sections in this object file.
+
+        This is a generator for llvm.object.Section instances.
+
+        Sections are exposed as limited-use objects. See the module's
+        documentation on iterators for more.
+        """
+        sections = lib.LLVMGetSections(self)
+        last = None
+        while True:
+            if lib.LLVMIsSectionIteratorAtEnd(self, sections):
+                break
+
+            last = Section(sections)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextSection(sections)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeSectionIterator(sections)
+
+    def get_symbols(self, cache=False):
+        """Obtain the symbols in this object file.
+
+        This is a generator for llvm.object.Symbol instances.
+
+        Each Symbol instance is a limited-use object. See this module's
+        documentation on iterators for more.
+        """
+        symbols = lib.LLVMGetSymbols(self)
+        last = None
+        while True:
+            if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
+                break
+
+            last = Symbol(symbols, self)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextSymbol(symbols)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeSymbolIterator(symbols)
+
+class Section(LLVMObject):
+    """Represents a section in an object file."""
+
+    def __init__(self, ptr):
+        """Construct a new section instance.
+
+        Section instances can currently only be created from an ObjectFile
+        instance. Therefore, this constructor should not be used outside of
+        this module.
+        """
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+
+    @CachedProperty
+    def name(self):
+        """Obtain the string name of the section.
+
+        This is typically something like '.dynsym' or '.rodata'.
+        """
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionName(self)
+
+    @CachedProperty
+    def size(self):
+        """The size of the section, in long bytes."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionSize(self)
+
+    @CachedProperty
+    def contents(self):
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionContents(self)
+
+    @CachedProperty
+    def address(self):
+        """The address of this section, in long bytes."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionAddress(self)
+
+    def has_symbol(self, symbol):
+        """Returns whether a Symbol instance is present in this Section."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        assert isinstance(symbol, Symbol)
+        return lib.LLVMGetSectionContainsSymbol(self, symbol)
+
+    def get_relocations(self, cache=False):
+        """Obtain the relocations in this Section.
+
+        This is a generator for llvm.object.Relocation instances.
+
+        Each instance is a limited used object. See this module's documentation
+        on iterators for more.
+        """
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        relocations = lib.LLVMGetRelocations(self)
+        last = None
+        while True:
+            if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
+                break
+
+            last = Relocation(relocations)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextRelocation(relocations)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeRelocationIterator(relocations)
+
+    def cache(self):
+        """Cache properties of this Section.
+
+        This can be called as a workaround to the single active Section
+        limitation. When called, the properties of the Section are fetched so
+        they are still available after the Section has been marked inactive.
+        """
+        getattr(self, 'name')
+        getattr(self, 'size')
+        getattr(self, 'contents')
+        getattr(self, 'address')
+
+    def expire(self):
+        """Expire the section.
+
+        This is called internally by the section iterator.
+        """
+        self.expired = True
+
+class Symbol(LLVMObject):
+    """Represents a symbol in an object file."""
+    def __init__(self, ptr, object_file):
+        assert isinstance(ptr, c_object_p)
+        assert isinstance(object_file, ObjectFile)
+
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+        self._object_file = object_file
+
+    @CachedProperty
+    def name(self):
+        """The str name of the symbol.
+
+        This is often a function or variable name. Keep in mind that name
+        mangling could be in effect.
+        """
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolName(self)
+
+    @CachedProperty
+    def address(self):
+        """The address of this symbol, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolAddress(self)
+
+    @CachedProperty
+    def file_offset(self):
+        """The offset of this symbol in the file, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolFileOffset(self)
+
+    @CachedProperty
+    def size(self):
+        """The size of the symbol, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolSize(self)
+
+    @CachedProperty
+    def section(self):
+        """The Section to which this Symbol belongs.
+
+        The returned Section instance does not expire, unlike Sections that are
+        commonly obtained through iteration.
+
+        Because this obtains a new section iterator each time it is accessed,
+        calling this on a number of Symbol instances could be expensive.
+        """
+        sections = lib.LLVMGetSections(self._object_file)
+        lib.LLVMMoveToContainingSection(sections, self)
+
+        return Section(sections)
+
+    def cache(self):
+        """Cache all cacheable properties."""
+        getattr(self, 'name')
+        getattr(self, 'address')
+        getattr(self, 'file_offset')
+        getattr(self, 'size')
+
+    def expire(self):
+        """Mark the object as expired to prevent future API accesses.
+
+        This is called internally by this module and it is unlikely that
+        external callers have a legitimate reason for using it.
+        """
+        self.expired = True
+
+class Relocation(LLVMObject):
+    """Represents a relocation definition."""
+    def __init__(self, ptr):
+        """Create a new relocation instance.
+
+        Relocations are created from objects derived from Section instances.
+        Therefore, this constructor should not be called outside of this
+        module. See Section.get_relocations() for the proper method to obtain
+        a Relocation instance.
+        """
+        assert isinstance(ptr, c_object_p)
+
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+
+    @CachedProperty
+    def address(self):
+        """The address of this relocation, in long bytes."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationAddress(self)
+
+    @CachedProperty
+    def offset(self):
+        """The offset of this relocation, in long bytes."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationOffset(self)
+
+    @CachedProperty
+    def symbol(self):
+        """The Symbol corresponding to this Relocation."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        ptr = lib.LLVMGetRelocationSymbol(self)
+        return Symbol(ptr)
+
+    @CachedProperty
+    def type_number(self):
+        """The relocation type, as a long."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationType(self)
+
+    @CachedProperty
+    def type_name(self):
+        """The relocation type's name, as a str."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationTypeName(self)
+
+    @CachedProperty
+    def value_string(self):
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationValueString(self)
+
+    def expire(self):
+        """Expire this instance, making future API accesses fail."""
+        self.expired = True
+
+    def cache(self):
+        """Cache all cacheable properties on this instance."""
+        getattr(self, 'address')
+        getattr(self, 'offset')
+        getattr(self, 'symbol')
+        getattr(self, 'type')
+        getattr(self, 'type_name')
+        getattr(self, 'value_string')
+
+def register_library(library):
+    """Register function prototypes with LLVM library instance."""
+
+    # Object.h functions
+    library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
+    library.LLVMCreateObjectFile.restype = c_object_p
+
+    library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
+
+    library.LLVMGetSections.argtypes = [ObjectFile]
+    library.LLVMGetSections.restype = c_object_p
+
+    library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
+
+    library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
+    library.LLVMIsSectionIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextSection.argtypes = [c_object_p]
+
+    library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
+
+    library.LLVMGetSymbols.argtypes = [ObjectFile]
+    library.LLVMGetSymbols.restype = c_object_p
+
+    library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
+
+    library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
+    library.LLVMIsSymbolIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
+
+    library.LLVMGetSectionName.argtypes = [c_object_p]
+    library.LLVMGetSectionName.restype = c_char_p
+
+    library.LLVMGetSectionSize.argtypes = [c_object_p]
+    library.LLVMGetSectionSize.restype = c_uint64
+
+    library.LLVMGetSectionContents.argtypes = [c_object_p]
+    library.LLVMGetSectionContents.restype = c_char_p
+
+    library.LLVMGetSectionAddress.argtypes = [c_object_p]
+    library.LLVMGetSectionAddress.restype = c_uint64
+
+    library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
+    library.LLVMGetSectionContainsSymbol.restype = bool
+
+    library.LLVMGetRelocations.argtypes = [c_object_p]
+    library.LLVMGetRelocations.restype = c_object_p
+
+    library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
+
+    library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
+    library.LLVMIsRelocationIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
+
+    library.LLVMGetSymbolName.argtypes = [Symbol]
+    library.LLVMGetSymbolName.restype = c_char_p
+
+    library.LLVMGetSymbolAddress.argtypes = [Symbol]
+    library.LLVMGetSymbolAddress.restype = c_uint64
+
+    library.LLVMGetSymbolFileOffset.argtypes = [Symbol]
+    library.LLVMGetSymbolFileOffset.restype = c_uint64
+
+    library.LLVMGetSymbolSize.argtypes = [Symbol]
+    library.LLVMGetSymbolSize.restype = c_uint64
+
+    library.LLVMGetRelocationAddress.argtypes = [c_object_p]
+    library.LLVMGetRelocationAddress.restype = c_uint64
+
+    library.LLVMGetRelocationOffset.argtypes = [c_object_p]
+    library.LLVMGetRelocationOffset.restype = c_uint64
+
+    library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
+    library.LLVMGetRelocationSymbol.restype = c_object_p
+
+    library.LLVMGetRelocationType.argtypes = [c_object_p]
+    library.LLVMGetRelocationType.restype = c_uint64
+
+    library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
+    library.LLVMGetRelocationTypeName.restype = c_char_p
+
+    library.LLVMGetRelocationValueString.argtypes = [c_object_p]
+    library.LLVMGetRelocationValueString.restype = c_char_p
+
+lib = get_library()
+register_library(lib)
diff --git a/bindings/python/llvm/tests/__init__.py b/bindings/python/llvm/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/bindings/python/llvm/tests/__init__.py
diff --git a/bindings/python/llvm/tests/base.py b/bindings/python/llvm/tests/base.py
new file mode 100644
index 0000000..ff9eb2f
--- /dev/null
+++ b/bindings/python/llvm/tests/base.py
@@ -0,0 +1,32 @@
+import os.path
+import unittest
+
+POSSIBLE_TEST_BINARIES = [
+    'libreadline.so.5',
+    'libreadline.so.6',
+]
+
+POSSIBLE_TEST_BINARY_PATHS = [
+    '/usr/lib/debug',
+    '/lib',
+    '/usr/lib',
+    '/usr/local/lib',
+    '/lib/i386-linux-gnu',
+]
+
+class TestBase(unittest.TestCase):
+    def get_test_binary(self):
+        """Helper to obtain a test binary for object file testing.
+
+        FIXME Support additional, highly-likely targets or create one
+        ourselves.
+        """
+        for d in POSSIBLE_TEST_BINARY_PATHS:
+            for lib in POSSIBLE_TEST_BINARIES:
+                path = os.path.join(d, lib)
+
+                if os.path.exists(path):
+                    return path
+
+        raise Exception('No suitable test binaries available!')
+    get_test_binary.__test__ = False
diff --git a/bindings/python/llvm/tests/test_core.py b/bindings/python/llvm/tests/test_core.py
new file mode 100644
index 0000000..8123e15
--- /dev/null
+++ b/bindings/python/llvm/tests/test_core.py
@@ -0,0 +1,18 @@
+from .base import TestBase
+from ..core import OpCode
+from ..core import MemoryBuffer
+
+class TestCore(TestBase):
+    def test_opcode(self):
+        self.assertTrue(hasattr(OpCode, 'Ret'))
+        self.assertTrue(isinstance(OpCode.Ret, OpCode))
+        self.assertEqual(OpCode.Ret.value, 1)
+
+        op = OpCode.from_value(1)
+        self.assertTrue(isinstance(op, OpCode))
+        self.assertEqual(op, OpCode.Ret)
+
+    def test_memory_buffer_create_from_file(self):
+        source = self.get_test_binary()
+
+        MemoryBuffer(filename=source)
diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py
new file mode 100644
index 0000000..545e866
--- /dev/null
+++ b/bindings/python/llvm/tests/test_disassembler.py
@@ -0,0 +1,28 @@
+from .base import TestBase
+
+from ..disassembler import Disassembler
+
+class TestDisassembler(TestBase):
+    def test_instantiate(self):
+         Disassembler('i686-apple-darwin9')
+
+    def test_basic(self):
+        sequence = '\x67\xe3\x81' # jcxz -127
+        triple = 'i686-apple-darwin9'
+
+        disassembler = Disassembler(triple)
+
+        count, s = disassembler.get_instruction(sequence)
+        self.assertEqual(count, 3)
+        self.assertEqual(s, '\tjcxz\t-127')
+
+    def test_get_instructions(self):
+        sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi
+
+        disassembler = Disassembler('i686-apple-darwin9')
+
+        instructions = list(disassembler.get_instructions(sequence))
+        self.assertEqual(len(instructions), 2)
+
+        self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
+        self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))
diff --git a/bindings/python/llvm/tests/test_object.py b/bindings/python/llvm/tests/test_object.py
new file mode 100644
index 0000000..7ff981b
--- /dev/null
+++ b/bindings/python/llvm/tests/test_object.py
@@ -0,0 +1,67 @@
+from .base import TestBase
+from ..object import ObjectFile
+from ..object import Relocation
+from ..object import Section
+from ..object import Symbol
+
+class TestObjectFile(TestBase):
+    def get_object_file(self):
+        source = self.get_test_binary()
+        return ObjectFile(filename=source)
+
+    def test_create_from_file(self):
+        self.get_object_file()
+
+    def test_get_sections(self):
+        o = self.get_object_file()
+
+        count = 0
+        for section in o.get_sections():
+            count += 1
+            assert isinstance(section, Section)
+            assert isinstance(section.name, str)
+            assert isinstance(section.size, long)
+            assert isinstance(section.contents, str)
+            assert isinstance(section.address, long)
+
+        self.assertGreater(count, 0)
+
+        for section in o.get_sections():
+            section.cache()
+
+    def test_get_symbols(self):
+        o = self.get_object_file()
+
+        count = 0
+        for symbol in o.get_symbols():
+            count += 1
+            assert isinstance(symbol, Symbol)
+            assert isinstance(symbol.name, str)
+            assert isinstance(symbol.address, long)
+            assert isinstance(symbol.size, long)
+            assert isinstance(symbol.file_offset, long)
+
+        self.assertGreater(count, 0)
+
+        for symbol in o.get_symbols():
+            symbol.cache()
+
+    def test_symbol_section_accessor(self):
+        o = self.get_object_file()
+
+        for symbol in o.get_symbols():
+            section = symbol.section
+            assert isinstance(section, Section)
+
+            break
+
+    def test_get_relocations(self):
+        o = self.get_object_file()
+        for section in o.get_sections():
+            for relocation in section.get_relocations():
+                assert isinstance(relocation, Relocation)
+                assert isinstance(relocation.address, long)
+                assert isinstance(relocation.offset, long)
+                assert isinstance(relocation.type_number, long)
+                assert isinstance(relocation.type_name, str)
+                assert isinstance(relocation.value_string, str)
diff --git a/configure b/configure
index a6ed7a1..b7e42a7 100755
--- a/configure
+++ b/configure
@@ -736,8 +736,6 @@ CIRCO
 GV
 DOTTY
 XDOT_PY
-PERL
-HAVE_PERL
 INSTALL_PROGRAM
 INSTALL_SCRIPT
 INSTALL_DATA
@@ -768,6 +766,9 @@ NO_MISSING_FIELD_INITIALIZERS
 COVERED_SWITCH_DEFAULT
 USE_UDIS86
 USE_OPROFILE
+USE_INTEL_JITEVENTS
+INTEL_JITEVENTS_INCDIR
+INTEL_JITEVENTS_LIBDIR
 HAVE_PTHREAD
 HUGE_VAL_SANITY
 MMAP_FILE
@@ -1453,6 +1454,10 @@ Optional Packages:
   --with-udis86=<path>    Use udis86 external x86 disassembler library
   --with-oprofile=<prefix>
                           Tell OProfile >= 0.9.4 how to symbolize JIT output
+  --with-intel-jitevents=<vtune-amplifier-dir>
+                          Specify location of run-time support library for
+                          Intel JIT API
+                          (default=/opt/intel/vtune_amplifier_xe_2011)
 
 Some influential environment variables:
   CC          C compiler command
@@ -6905,73 +6910,6 @@ _ACEOF
 
 fi
 
-
-# Extract the first word of "perl", so it can be a program name with args.
-set dummy perl; ac_word=$2
-{ echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
-if test "${ac_cv_path_PERL+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  case $PERL in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PERL="$PERL" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_PERL="$as_dir/$ac_word$ac_exec_ext"
-    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-done
-IFS=$as_save_IFS
-
-  test -z "$ac_cv_path_PERL" && ac_cv_path_PERL="none"
-  ;;
-esac
-fi
-PERL=$ac_cv_path_PERL
-if test -n "$PERL"; then
-  { echo "$as_me:$LINENO: result: $PERL" >&5
-echo "${ECHO_T}$PERL" >&6; }
-else
-  { echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6; }
-fi
-
-
-if test "$PERL" != "none"; then
-  { echo "$as_me:$LINENO: checking for Perl 5.006 or newer" >&5
-echo $ECHO_N "checking for Perl 5.006 or newer... $ECHO_C" >&6; }
-  if $PERL -e 'use 5.006;' 2>&1 > /dev/null; then
-    { echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6; }
-  else
-    PERL=none
-    { echo "$as_me:$LINENO: result: not found" >&5
-echo "${ECHO_T}not found" >&6; }
-  fi
-fi
-
-
-if test x"$PERL" = xnone; then
-   HAVE_PERL=0
-
-   { { echo "$as_me:$LINENO: error: perl is required but was not found, please install it" >&5
-echo "$as_me: error: perl is required but was not found, please install it" >&2;}
-   { (exit 1); exit 1; }; }
-else
-   HAVE_PERL=1
-
-fi
-
 # Find a good install program.  We prefer a C program (faster),
 # so one script is as good as another.  But avoid the broken or
 # incompatible versions:
@@ -10464,7 +10402,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10467 "configure"
+#line 10405 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13329,9 +13267,11 @@ if test "${with_oprofile+set}" = set; then
         *) llvm_cv_oppath="${withval}/lib/oprofile"
            CPPFLAGS="-I${withval}/include";;
       esac
-      if test -n "$llvm_cv_oppath" ; then
-        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
-                                        { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
+      case $llvm_cv_os_type in
+        Linux)
+          if test -n "$llvm_cv_oppath" ; then
+            LIBS="$LIBS -lopagent -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+                                                            { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
 echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; }
 if test "${ac_cv_search_bfd_init+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -13430,7 +13370,7 @@ if test "$ac_res" != no; then
 
 fi
 
-        { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
+            { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
 echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; }
 if test "${ac_cv_search_op_open_agent+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -13529,12 +13469,12 @@ if test "$ac_res" != no; then
 
 else
 
-          echo "Error! You need to have libopagent around."
-          exit -1
+              echo "Error! You need to have libopagent around."
+              exit -1
 
 fi
 
-        if test "${ac_cv_header_opagent_h+set}" = set; then
+            if test "${ac_cv_header_opagent_h+set}" = set; then
   { echo "$as_me:$LINENO: checking for opagent.h" >&5
 echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
 if test "${ac_cv_header_opagent_h+set}" = set; then
@@ -13692,13 +13632,18 @@ if test $ac_cv_header_opagent_h = yes; then
   :
 else
 
-          echo "Error! You need to have opagent.h around."
-          exit -1
+              echo "Error! You need to have opagent.h around."
+              exit -1
 
 fi
 
 
-      fi
+          fi ;;
+        *)
+          { { echo "$as_me:$LINENO: error: OProfile support is available on Linux only." >&5
+echo "$as_me: error: OProfile support is available on Linux only." >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
 
 else
 
@@ -13709,7 +13654,327 @@ fi
 
 
 cat >>confdefs.h <<_ACEOF
-#define USE_OPROFILE $USE_OPROFILE
+#define LLVM_USE_OPROFILE $USE_OPROFILE
+_ACEOF
+
+
+
+# Check whether --with-intel-jitevents was given.
+if test "${with_intel_jitevents+set}" = set; then
+  withval=$with_intel_jitevents;
+      case $llvm_cv_os_type in
+        Linux|Win32|Cygwin|MingW) ;;
+        *)
+          { { echo "$as_me:$LINENO: error:
+            Intel JIT API support is available on Linux and Windows only.\"" >&5
+echo "$as_me: error:
+            Intel JIT API support is available on Linux and Windows only.\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+
+      USE_INTEL_JITEVENTS=1
+
+      case "$llvm_cv_target_arch" in
+        x86)    llvm_intel_jitevents_archdir="lib32";;
+        x86_64) llvm_intel_jitevents_archdir="lib64";;
+        *)      echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
+                exit -1;;
+      esac
+      INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
+      INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
+      case "$withval" in
+        /* | [A-Za-z]:[\\/]*) INTEL_JITEVENTS_INCDIR=$withval/include
+                                  INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
+        *) ;;
+
+      esac
+
+
+
+
+      LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
+      CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
+
+      { echo "$as_me:$LINENO: checking for library containing iJIT_IsProfilingActive" >&5
+echo $ECHO_N "checking for library containing iJIT_IsProfilingActive... $ECHO_C" >&6; }
+if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char iJIT_IsProfilingActive ();
+int
+main ()
+{
+return iJIT_IsProfilingActive ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' jitprofiling; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_iJIT_IsProfilingActive=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  :
+else
+  ac_cv_search_iJIT_IsProfilingActive=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_iJIT_IsProfilingActive" >&5
+echo "${ECHO_T}$ac_cv_search_iJIT_IsProfilingActive" >&6; }
+ac_res=$ac_cv_search_iJIT_IsProfilingActive
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+        echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+
+fi
+
+      if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  { echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
+echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
+echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking jitprofiling.h usability" >&5
+echo $ECHO_N "checking jitprofiling.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <jitprofiling.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking jitprofiling.h presence" >&5
+echo $ECHO_N "checking jitprofiling.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <jitprofiling.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: jitprofiling.h: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: jitprofiling.h: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: jitprofiling.h:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: jitprofiling.h: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: jitprofiling.h:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
+echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_header_jitprofiling_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
+echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
+
+fi
+if test $ac_cv_header_jitprofiling_h = yes; then
+  :
+else
+
+        echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+
+fi
+
+
+
+
+else
+
+      USE_INTEL_JITEVENTS=0
+
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_USE_INTEL_JITEVENTS $USE_INTEL_JITEVENTS
 _ACEOF
 
 
@@ -21952,8 +22217,6 @@ CIRCO!$CIRCO$ac_delim
 GV!$GV$ac_delim
 DOTTY!$DOTTY$ac_delim
 XDOT_PY!$XDOT_PY$ac_delim
-PERL!$PERL$ac_delim
-HAVE_PERL!$HAVE_PERL$ac_delim
 INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
 INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
 INSTALL_DATA!$INSTALL_DATA$ac_delim
@@ -21984,6 +22247,9 @@ NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
 COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
 USE_UDIS86!$USE_UDIS86$ac_delim
 USE_OPROFILE!$USE_OPROFILE$ac_delim
+USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
+INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim
+INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
@@ -22009,7 +22275,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CMake.html b/docs/CMake.html
index c822ed7..acc7fe9 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -357,6 +357,19 @@
   <dd>Path to Clang's source directory. Defaults to tools/clang.
     Clang will not be built when it is empty or it does not point valid
     path.</dd>
+
+  <dt><b>LLVM_USE_OPROFILE</b>:BOOL</dt>
+  <dd> Enable building OProfile JIT support. Defaults to OFF</dd>
+
+  <dt><b>LLVM_USE_INTEL_JITEVENTS</b>:BOOL</dt>
+  <dd> Enable building support for Intel JIT Events API. Defaults to OFF</dd>
+
+  <dt><b>LLVM_INTEL_JITEVENTS_DIR</b>:PATH</dt>
+  <dd> Path to installation of Intel(R) VTune(TM) Amplifier XE 2011,
+    used to locate the <tt>jitprofiling</tt> library. Default =
+    <tt>%VTUNE_AMPLIFIER_XE_2011_DIR%</tt> (Windows)
+    | <tt>/opt/intel/vtune_amplifier_xe_2011</tt> (Linux) </dd>
+
 </dl>
 
 </div>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index a5f6f35..f00caa3 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -85,17 +85,16 @@
 
 
 <!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
 <div>
 
 <p>This document attempts to describe a few coding standards that are being used
 in the LLVM source tree.  Although no coding standards should be regarded as
-absolute requirements to be followed in all instances, coding standards can be
-useful.</p>
+absolute requirements to be followed in all instances, coding standards are
+particularly important for large-scale code bases that follow a library-based
+design (like LLVM).</p>
 
 <p>This document intentionally does not prescribe fixed standards for religious
 issues such as brace placement and space usage.  For issues like this, follow
@@ -103,14 +102,27 @@ the golden rule:</p>
 
 <blockquote>
 
-<p><b><a name="goldenrule">If you are adding a significant body of source to a
-project, feel free to use whatever style you are most comfortable with.  If you
-are extending, enhancing, or bug fixing already implemented code, use the style
-that is already being used so that the source is uniform and easy to
-follow.</a></b></p>
+<p><b><a name="goldenrule">If you are extending, enhancing, or bug fixing
+already implemented code, use the style that is already being used so that the
+source is uniform and easy to follow.</a></b></p>
 
 </blockquote>
-
+  
+<p>Note that some code bases (e.g. libc++) have really good reasons to deviate
+from the coding standards.  In the case of libc++, this is because the naming
+and other conventions are dictated by the C++ standard.  If you think there is
+a specific good reason to deviate from the standards here, please bring it up
+on the LLVMdev mailing list.</p>
+
+<p>There are some conventions that are not uniformly followed in the code base
+(e.g. the naming convention).  This is because they are relatively new, and a
+lot of code was written before they were put in place.  Our long term goal is
+for the entire codebase to follow the convention, but we explicitly <em>do 
+not</em> want patches that do large-scale reformating of existing code.  OTOH,
+it is reasonable to rename the methods of a class if you're about to change it
+in some other way.  Just do the reformating as a separate commit from the
+functionality change. </p>
+  
 <p>The ultimate goal of these guidelines is the increase readability and
 maintainability of our common source base. If you have suggestions for topics to
 be included, please mail them to <a
@@ -141,11 +153,11 @@ href="mailto:sabre@nondot.org">Chris</a>.</p>
 <div>
 
 <p>Comments are one critical part of readability and maintainability.  Everyone
-knows they should comment, so should you.  When writing comments, write them as
-English prose, which means they should use proper capitalization, punctuation,
-etc.  Although we all should probably
-comment our code more than we do, there are a few very critical places that
-documentation is very useful:</p>
+knows they should comment their code, and so should you.  When writing comments,
+write them as English prose, which means they should use proper capitalization,
+punctuation, etc.  Aim to describe what a code is trying to do and why, not
+"how" it does it at a micro level. Here are a few critical things to
+document:</p>
 
 <h5>File Headers</h5>
 
@@ -153,9 +165,7 @@ documentation is very useful:</p>
 
 <p>Every source file should have a header on it that describes the basic 
 purpose of the file.  If a file does not have a header, it should not be 
-checked into Subversion.  Most source trees will probably have a standard
-file header format.  The standard format for the LLVM source tree looks like
-this:</p>
+checked into the tree.  The standard header looks like this:</p>
 
 <div class="doc_code">
 <pre>
@@ -198,9 +208,8 @@ included, as well as any notes or "gotchas" in the code to watch out for.</p>
 
 <p>Classes are one fundamental part of a good object oriented design.  As such,
 a class definition should have a comment block that explains what the class is
-used for... if it's not obvious.  If it's so completely obvious your grandma
-could figure it out, it's probably safe to leave it out.  Naming classes
-something sane goes a long ways towards avoiding writing documentation.</p>
+used for and how it works.  Every non-trivial class is expected to have a
+doxygen comment block.</p>
 
 
 <h5>Method information</h5>
@@ -211,8 +220,7 @@ something sane goes a long ways towards avoiding writing documentation.</p>
 documented properly.  A quick note about what it does and a description of the
 borderline behaviour is all that is necessary here (unless something
 particularly tricky or insidious is going on).  The hope is that people can
-figure out how to use your interfaces without reading the code itself... that is
-the goal metric.</p>
+figure out how to use your interfaces without reading the code itself.</p>
 
 <p>Good things to talk about here are what happens when something unexpected
 happens: does the method return null?  Abort?  Format your hard disk?</p>
@@ -398,14 +406,6 @@ if ((V = getValue())) {
 <p>which shuts <tt>gcc</tt> up.  Any <tt>gcc</tt> warning that annoys you can
 be fixed by massaging the code appropriately.</p>
 
-<p>These are the <tt>gcc</tt> warnings that I prefer to enable:</p>
-
-<div class="doc_code">
-<pre>
--Wall -Winline -W -Wwrite-strings -Wno-unused
-</pre>
-</div>
-
 </div>
 
 <!-- _______________________________________________________________________ -->
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index f4289a8..e0b1a8c 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -159,8 +159,8 @@ other results are not collated in any reasonable fashion.
 =head1 EXIT STATUS
 
 B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
-results. Otherwise, it will exit with the status 0. Other exit codes used for
-non-test related failures (for example a user error or an internal program
+results. Otherwise, it will exit with the status 0. Other exit codes are used
+for non-test related failures (for example a user error or an internal program
 error).
 
 =head1 TEST DISCOVERY
@@ -283,13 +283,13 @@ builds this is the directory that will be scanned for tests.
 
 B<test_exec_root> For out-of-dir builds, the path to the test suite root inside
 the object directory. This is where tests will be run and temporary output files
-places.
+placed.
 
 B<environment> A dictionary representing the environment to use when executing
 tests in the suite.
 
 B<suffixes> For B<lit> test formats which scan directories for tests, this
-variable as a list of suffixes to identify test files. Used by: I<ShTest>,
+variable is a list of suffixes to identify test files. Used by: I<ShTest>,
 I<TclTest>.
 
 B<substitutions> For B<lit> test formats which substitute variables into a test
@@ -315,7 +315,7 @@ directory being scanned.
 
 Once test suites are located, B<lit> recursively traverses the source directory
 (following I<test_src_root>) looking for tests. When B<lit> enters a
-sub-directory, it first checks to see if a nest test suite is defined in that
+sub-directory, it first checks to see if a nested test suite is defined in that
 directory. If so, it loads that test suite recursively, otherwise it
 instantiates a local test config for the directory (see L<"LOCAL CONFIGURATION
 FILES">).
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 29ac260..81dabc9 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -490,43 +490,43 @@
 
 <div>
 
-<p>LLVM programs are composed of "Module"s, each of which is a translation unit
-   of the input programs.  Each module consists of functions, global variables,
-   and symbol table entries.  Modules may be combined together with the LLVM
-   linker, which merges function (and global variable) definitions, resolves
-   forward declarations, and merges symbol table entries. Here is an example of
-   the "hello world" module:</p>
+<p>LLVM programs are composed of <tt>Module</tt>s, each of which is a
+   translation unit of the input programs.  Each module consists of functions,
+   global variables, and symbol table entries.  Modules may be combined together
+   with the LLVM linker, which merges function (and global variable)
+   definitions, resolves forward declarations, and merges symbol table
+   entries. Here is an example of the "hello world" module:</p>
 
 <pre class="doc_code">
 <i>; Declare the string constant as a global constant.</i>&nbsp;
-<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"      <i>; [13 x i8]*</i>&nbsp;
+<a href="#identifiers">@.str</a> = <a href="#linkage_private">private</a>&nbsp;<a href="#globalvars">unnamed_addr</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"&nbsp;
 
 <i>; External declaration of the puts function</i>&nbsp;
-<a href="#functionstructure">declare</a> i32 @puts(i8*)                                      <i>; i32 (i8*)* </i>&nbsp;
+<a href="#functionstructure">declare</a> i32 @puts(i8* <a href="#nocapture">nocapture</a>) <a href="#fnattrs">nounwind</a>&nbsp;
 
 <i>; Definition of main function</i>
 define i32 @main() {   <i>; i32()* </i>&nbsp;
   <i>; Convert [13 x i8]* to i8  *...</i>&nbsp;
-  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8*</i>&nbsp;
+  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.str, i64 0, i64 0
 
   <i>; Call puts function to write out the string to stdout.</i>&nbsp;
-  <a href="#i_call">call</a> i32 @puts(i8* %cast210)           <i>; i32</i>&nbsp;
+  <a href="#i_call">call</a> i32 @puts(i8* %cast210)
   <a href="#i_ret">ret</a> i32 0&nbsp;
 }
 
 <i>; Named metadata</i>
-!1 = metadata !{i32 41}
+!1 = metadata !{i32 42}
 !foo = !{!1, null}
 </pre>
 
 <p>This example is made up of a <a href="#globalvars">global variable</a> named
-   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function,
+   "<tt>.str</tt>", an external declaration of the "<tt>puts</tt>" function,
    a <a href="#functionstructure">function definition</a> for
    "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a> 
-   "<tt>foo"</tt>.</p>
+   "<tt>foo</tt>".</p>
 
-<p>In general, a module is made up of a list of global values, where both
-   functions and global variables are global values.  Global values are
+<p>In general, a module is made up of a list of global values (where both
+   functions and global variables are global values). Global values are
    represented by a pointer to a memory location (in this case, a pointer to an
    array of char, and a pointer to a function), and have one of the
    following <a href="#linkage">linkage types</a>.</p>
@@ -3081,36 +3081,44 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
     <tr>
       <td>1</td>
       <td align="left">
-        <dt><b>Error</b></dt>
-        <dd>Emits an error if two values disagree. It is an error to have an ID
-            with both an Error and a Warning behavior.</dd>
+        <dl>
+          <dt><b>Error</b></dt>
+          <dd>Emits an error if two values disagree. It is an error to have an ID
+              with both an Error and a Warning behavior.</dd>
+        </dl>
       </td>
     </tr>
     <tr>
       <td>2</td>
       <td align="left">
-        <dt><b>Warning</b></dt>
-        <dd>Emits a warning if two values disagree.</dd>
+        <dl>
+          <dt><b>Warning</b></dt>
+          <dd>Emits a warning if two values disagree.</dd>
+        </dl>
       </td>
     </tr>
     <tr>
       <td>3</td>
       <td align="left">
-        <dt><b>Require</b></dt>
-        <dd>Emits an error when the specified value is not present or doesn't
-            have the specified value. It is an error for two (or more)
-            <tt>llvm.module.flags</tt> with the same ID to have the Require
-            behavior but different values. There may be multiple Require flags
-            per ID.</dd>
+        <dl>
+          <dt><b>Require</b></dt>
+          <dd>Emits an error when the specified value is not present or doesn't
+              have the specified value. It is an error for two (or more)
+              <tt>llvm.module.flags</tt> with the same ID to have the Require
+              behavior but different values. There may be multiple Require flags
+              per ID.</dd>
+        </dl>
       </td>
     </tr>
     <tr>
       <td>4</td>
       <td align="left">
-        <dt><b>Override</b></dt>
-        <dd>Uses the specified value if the two values disagree. It is an error
-            for two (or more) <tt>llvm.module.flags</tt> with the same ID to
-            have the Override behavior but different values.</dd>
+        <dl>
+          <dt><b>Override</b></dt>
+          <dd>Uses the specified value if the two values disagree. It is an
+              error for two (or more) <tt>llvm.module.flags</tt> with the same
+              ID to have the Override behavior but different values.</dd>
+        </dl>
       </td>
     </tr>
   </tbody>
@@ -3174,9 +3182,10 @@ metadata !{ metadata !"foo", i32 1 }
    following key-value pairs:</p>
 
 <table border="1" cellspacing="0" cellpadding="4">
+  <col width="30%">
   <tbody>
     <tr>
-      <th width="30%">Key</th>
+      <th>Key</th>
       <th>Value</th>
     </tr>
     <tr>
@@ -4861,8 +4870,7 @@ IfUnequal:
    or <tt><a href="#i_resume">resume</a></tt> instructions), the memory is
    reclaimed.  Allocating zero bytes is legal, but the result is undefined.
    The order in which memory is allocated (ie., which way the stack grows) is
-   not specified, and relational comparisons involving '<tt>alloca</tt>'s are
-   undefined.</p>
+   not specified.</p>
 
 <p>
 
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index fd95a8a..c1b1f41 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -483,6 +483,37 @@ syntax, there are still significant gaps in that support.</p>
    LLVM API changes are:</p>
 
 <ul>
+  <li>Target specific options have been moved from global variables to members
+      on the new <code>TargetOptions</code> class, which is local to each
+      <code>TargetMachine</code>. As a consequence, the associated flags will
+      no longer be accepted by <tt>clang -mllvm</tt>. This includes:
+<ul>
+<li><code>llvm::PrintMachineCode</code>
+<li><code>llvm::NoFramePointerElim</code>
+<li><code>llvm::NoFramePointerElimNonLeaf</code>
+<li><code>llvm::DisableFramePointerElim(const MachineFunction &)</code>
+<li><code>llvm::LessPreciseFPMADOption</code>
+<li><code>llvm::LessPrecideFPMAD()</code>
+<li><code>llvm::NoExcessFPPrecision</code>
+<li><code>llvm::UnsafeFPMath</code>
+<li><code>llvm::NoInfsFPMath</code>
+<li><code>llvm::NoNaNsFPMath</code>
+<li><code>llvm::HonorSignDependentRoundingFPMathOption</code>
+<li><code>llvm::HonorSignDependentRoundingFPMath()</code>
+<li><code>llvm::UseSoftFloat</code>
+<li><code>llvm::FloatABIType</code>
+<li><code>llvm::NoZerosInBSS</code>
+<li><code>llvm::JITExceptionHandling</code>
+<li><code>llvm::JITEmitDebugInfo</code>
+<li><code>llvm::JITEmitDebugInfoToDisk</code>
+<li><code>llvm::GuaranteedTailCallOpt</code>
+<li><code>llvm::StackAlignmentOverride</code>
+<li><code>llvm::RealignStack</code>
+<li><code>llvm::DisableJumpTables</code>
+<li><code>llvm::EnableFastISel</code>
+<li><code>llvm::getTrapFunctionName()</code>
+<li><code>llvm::EnableSegmentedStacks</code>
+</ul></li>
   <li>....</li>
 </ul>
 
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index e7eac08..8072749 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -55,15 +55,24 @@
   </ol></li>
   <li><a href="#llvmdwarfextension">LLVM Dwarf Extensions</a>
     <ol>
-      <li><a href="#objcproperty">Debugging Information Extension 
-	  for Objective C Properties</a></li>
-      <ul>
-	<li><a href="#objcpropertyintroduction">Introduction</a></li>
-	<li><a href="#objcpropertyproposal">Proposal</a></li>
-	<li><a href="#objcpropertynewattributes">New DWARF Attributes</a></li>
-	<li><a href="#objcpropertynewconstants">New DWARF Constants</a></li>
-      </ul>
-
+      <li><a href="#objcproperty">Debugging Information Extension
+	  for Objective C Properties</a>
+        <ul>
+	  <li><a href="#objcpropertyintroduction">Introduction</a></li>
+	  <li><a href="#objcpropertyproposal">Proposal</a></li>
+	  <li><a href="#objcpropertynewattributes">New DWARF Attributes</a></li>
+	  <li><a href="#objcpropertynewconstants">New DWARF Constants</a></li>
+        </ul>
+      </li>
+      <li><a href="#acceltable">Name Accelerator Tables</a>
+        <ul>
+          <li><a href="#acceltableintroduction">Introduction</a></li>
+          <li><a href="#acceltablehashes">Hash Tables</a></li>
+          <li><a href="#acceltabledetails">Details</a></li>
+          <li><a href="#acceltablecontents">Contents</a></li>
+          <li><a href="#acceltableextensions">Language Extensions and File Format Changes</a></li>
+        </ul>
+      </li>
     </ol>
   </li>
 </ul>
@@ -244,8 +253,8 @@ height="369">
    for the optimizer to optimize the program and debugging information without
    necessarily having to know anything about debugging information.  In
    particular, the use of metadata avoids duplicated debugging information from
-   the beginning, and the global dead code elimination pass automatically 
-   deletes debugging information for a function if it decides to delete the 
+   the beginning, and the global dead code elimination pass automatically
+   deletes debugging information for a function if it decides to delete the
    function. </p>
 
 <p>To do this, most of the debugging information (descriptors for types,
@@ -254,9 +263,9 @@ height="369">
 
 <p>Debug information is designed to be agnostic about the target debugger and
    debugging information representation (e.g. DWARF/Stabs/etc).  It uses a
-   generic pass to decode the information that represents variables, types, 
-   functions, namespaces, etc: this allows for arbitrary source-language 
-   semantics and type-systems to be used, as long as there is a module 
+   generic pass to decode the information that represents variables, types,
+   functions, namespaces, etc: this allows for arbitrary source-language
+   semantics and type-systems to be used, as long as there is a module
    written for the target debugger to interpret the information. </p>
 
 <p>To provide basic functionality, the LLVM debugger does have to make some
@@ -292,7 +301,7 @@ height="369">
    the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
    0x1000.)</p>
 
-<p>The fields of debug descriptors used internally by LLVM 
+<p>The fields of debug descriptors used internally by LLVM
    are restricted to only the simple data types <tt>i32</tt>, <tt>i1</tt>,
    <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and <tt>mdnode</tt>. </p>
 
@@ -314,7 +323,7 @@ height="369">
    with the current debug version (LLVMDebugVersion = 8 &lt;&lt; 16 or
    0x80000 or 524288.)</a></p>
 
-<p>The details of the various descriptors follow.</p>  
+<p>The details of the various descriptors follow.</p>
 
 <!-- ======================================================================= -->
 <h4>
@@ -326,14 +335,14 @@ height="369">
 <div class="doc_code">
 <pre>
 !0 = metadata !{
-  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
              ;; (DW_TAG_compile_unit)
-  i32,       ;; Unused field. 
-  i32,       ;; DWARF language identifier (ex. DW_LANG_C89) 
+  i32,       ;; Unused field.
+  i32,       ;; DWARF language identifier (ex. DW_LANG_C89)
   metadata,  ;; Source file name
   metadata,  ;; Source file directory (includes trailing slash)
   metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
-  i1,        ;; True if this is a main compile unit. 
+  i1,        ;; True if this is a main compile unit.
   i1,        ;; True if this is optimized.
   metadata,  ;; Flags
   i32        ;; Runtime version
@@ -353,7 +362,7 @@ height="369">
 
 <p>Compile unit descriptors provide the root context for objects declared in a
    specific compilation unit. File descriptors are defined using this context.
-   These descriptors are collected by a named metadata 
+   These descriptors are collected by a named metadata
    <tt>!llvm.dbg.cu</tt>. Compile unit descriptor keeps track of subprograms,
    global variables and type information.
 
@@ -369,7 +378,7 @@ height="369">
 <div class="doc_code">
 <pre>
 !0 = metadata !{
-  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
              ;; (DW_TAG_file_type)
   metadata,  ;; Source file name
   metadata,  ;; Source file directory (includes trailing slash)
@@ -397,7 +406,7 @@ height="369">
 <div class="doc_code">
 <pre>
 !1 = metadata !{
-  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_variable)
   i32,      ;; Unused field.
   metadata, ;; Reference to context descriptor
@@ -444,7 +453,7 @@ global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
   i1,       ;; True if the global is defined in the compile unit (not extern)
   i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
   i32,      ;; Index into a virtual function
-  metadata, ;; indicates which base type contains the vtable pointer for the 
+  metadata, ;; indicates which base type contains the vtable pointer for the
             ;; derived class
   i32,      ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
   i1,       ;; isOptimized
@@ -512,9 +521,9 @@ global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
 <div class="doc_code">
 <pre>
 !4 = metadata !{
-  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_base_type)
-  metadata, ;; Reference to context 
+  metadata, ;; Reference to context
   metadata, ;; Name (may be "" for anonymous types)
   metadata, ;; Reference to file where defined (may be NULL)
   i32,      ;; Line number where defined (may be 0)
@@ -574,8 +583,8 @@ DW_ATE_unsigned_char = 8
   i64,      ;; Offset in bits
   i32,      ;; Flags to encode attributes, e.g. private
   metadata, ;; Reference to type derived from
-  metadata, ;; (optional) Name of the Objective C property associated with 
-            ;; Objective-C an ivar 
+  metadata, ;; (optional) Name of the Objective C property associated with
+            ;; Objective-C an ivar
   metadata, ;; (optional) Name of the Objective C property getter selector.
   metadata, ;; (optional) Name of the Objective C property setter selector.
   i32       ;; (optional) Objective C property attributes.
@@ -609,8 +618,8 @@ DW_TAG_restrict_type    = 55
 
 <p><tt>DW_TAG_typedef</tt> is used to provide a name for the derived type.</p>
 
-<p><tt>DW_TAG_pointer_type</tt>, <tt>DW_TAG_reference_type</tt>, 
-   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt> and 
+<p><tt>DW_TAG_pointer_type</tt>, <tt>DW_TAG_reference_type</tt>,
+   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt> and
    <tt>DW_TAG_restrict_type</tt> are used to qualify
    the <a href="#format_derived_type">derived type</a>. </p>
 
@@ -750,7 +759,7 @@ DW_TAG_inheritance      = 28
 <div class="doc_code">
 <pre>
 !6 = metadata !{
-  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_enumerator)
   metadata, ;; Name
   i64       ;; Value
@@ -918,27 +927,27 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 459008, metadata !1, metadata !"X", 
+!0 = metadata !{i32 459008, metadata !1, metadata !"X",
                 metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", 
-               metadata !"foo", metadata !3, i32 1, metadata !4, 
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
+               metadata !"foo", metadata !3, i32 1, metadata !4,
                i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", 
-                metadata !"/private/tmp", metadata !"clang 1.1", i1 true, 
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
+                metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
                 i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, 
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
                 i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
 !5 = metadata !{null}
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, 
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
                 i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
 !7 = metadata !{i32 2, i32 7, metadata !1, null}
 !8 = metadata !{i32 2, i32 3, metadata !1, null}
-!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3, 
+!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
                 metadata !6}; [ DW_TAG_auto_variable ]
 !10 = metadata !{i32 3, i32 7, metadata !1, null}
 !11 = metadata !{i32 3, i32 3, metadata !1, null}
-!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5, 
+!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
                  metadata !6}; [ DW_TAG_auto_variable ]
 !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
 !14 = metadata !{i32 5, i32 9, metadata !13, null}
@@ -958,7 +967,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 <div class="doc_code">
 <pre>
-call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7   
+call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
 </pre>
 </div>
 
@@ -972,9 +981,9 @@ call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
 <pre>
 !7 = metadata !{i32 2, i32 7, metadata !1, null}
 !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", 
-                metadata !"foo", metadata !"foo", metadata !3, i32 1, 
-                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]   
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
+                metadata !"foo", metadata !"foo", metadata !3, i32 1,
+                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
 </pre>
 </div>
 
@@ -999,7 +1008,7 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
 
 <p>The second intrinsic
    <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
-   encodes debugging information for variable <tt>Z</tt>. The metadata 
+   encodes debugging information for variable <tt>Z</tt>. The metadata
    <tt>!dbg !14</tt> attached to the intrinsic provides scope information for
    the variable <tt>Z</tt>.</p>
 
@@ -1080,9 +1089,9 @@ int main(int argc, char *argv[]) {
   i32 524305,    ;; Tag
   i32 0,         ;; Unused
   i32 4,         ;; Language Id
-  metadata !"MySource.cpp", 
-  metadata !"/Users/mine/sources", 
-  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
+  metadata !"MySource.cpp",
+  metadata !"/Users/mine/sources",
+  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
   i1 true,       ;; Main Compile Unit
   i1 false,      ;; Optimized compile unit
   metadata !"",  ;; Compiler flags
@@ -1093,8 +1102,8 @@ int main(int argc, char *argv[]) {
 ;;
 !1 = metadata !{
   i32 524329,    ;; Tag
-  metadata !"MySource.cpp", 
-  metadata !"/Users/mine/sources", 
+  metadata !"MySource.cpp",
+  metadata !"/Users/mine/sources",
   metadata !2    ;; Compile unit
 }
 
@@ -1104,7 +1113,7 @@ int main(int argc, char *argv[]) {
 !3 = metadata !{
   i32 524329,    ;; Tag
   metadata !"Myheader.h"
-  metadata !"/Users/mine/sources", 
+  metadata !"/Users/mine/sources",
   metadata !2    ;; Compile unit
 }
 
@@ -1112,9 +1121,9 @@ int main(int argc, char *argv[]) {
 </pre>
 </div>
 
-<p>llvm::Instruction provides easy access to metadata attached with an 
+<p>llvm::Instruction provides easy access to metadata attached with an
 instruction. One can extract line number information encoded in LLVM IR
-using <tt>Instruction::getMetadata()</tt> and 
+using <tt>Instruction::getMetadata()</tt> and
 <tt>DILocation::getLineNumber()</tt>.
 <pre>
  if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
@@ -1177,7 +1186,7 @@ int MyGlobal = 100;
 ;;
 ;; Define the basic type of 32 bit signed integer.  Note that since int is an
 ;; intrinsic type the source file is NULL and line 0.
-;;    
+;;
 !4 = metadata !{
   i32 524324,              ;; Tag
   metadata !1,             ;; Context
@@ -1232,7 +1241,7 @@ int main(int argc, char *argv[]) {
   metadata !1,       ;; File
   i32 1,             ;; Line number
   metadata !4,       ;; Type
-  i1 false,          ;; Is local 
+  i1 false,          ;; Is local
   i1 true,           ;; Is definition
   i32 0,             ;; Virtuality attribute, e.g. pure virtual function
   i32 0,             ;; Index into virtual table for C++ methods
@@ -1326,7 +1335,7 @@ define i32 @main(i32 %argc, i8** %argv) {
 !2 = metadata !{
   i32 524324,        ;; Tag
   metadata !1,       ;; Context
-  metadata !"unsigned char", 
+  metadata !"unsigned char",
   metadata !1,       ;; File
   i32 0,             ;; Line number
   i64 8,             ;; Size in Bits
@@ -1824,8 +1833,7 @@ enum Trees {
 <div>
 <!-- ======================================================================= -->
 <h3>
-  <a name="objcproperty">Debugging Information Extension for Objective C
-Properties</a></li>
+  <a name="objcproperty">Debugging Information Extension for Objective C Properties</a>
 </h3>
 <div>
 <!-- *********************************************************************** -->
@@ -1835,15 +1843,15 @@ Properties</a></li>
 <!-- *********************************************************************** -->
 
 <div>
-<p>Objective C provides a simpler way to declare and define accessor methods 
-using declared properties. The language provides features to declare a 
+<p>Objective C provides a simpler way to declare and define accessor methods
+using declared properties. The language provides features to declare a
 property and to let compiler synthesize accessor methods.
 </p>
 
-<p>The debugger lets developer inspect Objective C interfaces and their 
-instance variables and class variables. However, the debugger does not know 
+<p>The debugger lets developer inspect Objective C interfaces and their
+instance variables and class variables. However, the debugger does not know
 anything about the properties defined in Objective C interfaces. The debugger
-consumes information generated by compiler in DWARF format. The format does 
+consumes information generated by compiler in DWARF format. The format does
 not support encoding of Objective C properties. This proposal describes DWARF
 extensions to encode Objective C properties, which the debugger can use to let
 developers inspect Objective C properties.
@@ -1860,27 +1868,27 @@ developers inspect Objective C properties.
 
 <div>
 <p>Objective C properties exist separately from class members. A property
-can be defined only by &quot;setter&quot; and &quot;getter&quot; selectors, and 
-be calculated anew on each access.  Or a property can just be a direct access 
-to some declared ivar.  Finally it can have an ivar &quot;automatically 
-synthesized&quot; for it by the compiler, in which case the property can be 
-referred to in user code directly using the standard C dereference syntax as 
-well as through the property &quot;dot&quot; syntax, but there is no entry in 
+can be defined only by &quot;setter&quot; and &quot;getter&quot; selectors, and
+be calculated anew on each access.  Or a property can just be a direct access
+to some declared ivar.  Finally it can have an ivar &quot;automatically
+synthesized&quot; for it by the compiler, in which case the property can be
+referred to in user code directly using the standard C dereference syntax as
+well as through the property &quot;dot&quot; syntax, but there is no entry in
 the @interface declaration corresponding to this ivar.
 </p>
 <p>
-To facilitate debugging, these properties we will add a new DWARF TAG into the 
-DW_TAG_structure_type definition for the class to hold the description of a 
+To facilitate debugging, these properties we will add a new DWARF TAG into the
+DW_TAG_structure_type definition for the class to hold the description of a
 given property, and a set of DWARF attributes that provide said description.
-The property tag will also contain the name and declared type of the property.  
+The property tag will also contain the name and declared type of the property.
 </p>
 <p>
-If there is a related ivar, there will also be a DWARF property attribute placed 
-in the DW_TAG_member DIE for that ivar referring back to the property TAG for 
-that property. And in the case where the compiler synthesizes the ivar directly, 
-the compiler is expected to generate a DW_TAG_member for that ivar (with the 
-DW_AT_artificial set to 1), whose name will be the name used to access this 
-ivar directly in code, and with the property attribute pointing back to the 
+If there is a related ivar, there will also be a DWARF property attribute placed
+in the DW_TAG_member DIE for that ivar referring back to the property TAG for
+that property. And in the case where the compiler synthesizes the ivar directly,
+the compiler is expected to generate a DW_TAG_member for that ivar (with the
+DW_AT_artificial set to 1), whose name will be the name used to access this
+ivar directly in code, and with the property attribute pointing back to the
 property it is backing.
 </p>
 <p>
@@ -1889,17 +1897,17 @@ The following examples will serve as illustration for our discussion:
 
 <div class="doc_code">
 <pre>
-@interface I1 { 
+@interface I1 {
   int n2;
-} 
+}
 
-@property int p1; 
-@property int p2; 
+@property int p1;
+@property int p2;
 @end
 
-@implementation I1 
-@synthesize p1; 
-@synthesize p2 = n2; 
+@implementation I1
+@synthesize p1;
+@synthesize p2 = n2;
 @end
 </pre>
 </div>
@@ -1909,10 +1917,10 @@ This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output
 </p>
 <div class="doc_code">
 <pre>
-0x00000100:  TAG_structure_type [7] * 
+0x00000100:  TAG_structure_type [7] *
                AT_APPLE_runtime_class( 0x10 )
                AT_name( "I1" )
-               AT_decl_file( "Objc_Property.m" ) 
+               AT_decl_file( "Objc_Property.m" )
                AT_decl_line( 3 )
 
 0x00000110    TAG_APPLE_property
@@ -1923,13 +1931,13 @@ This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output
                 AT_name ( "p2" )
                 AT_type ( {0x00000150} ( int ) )
 
-0x00000130:   TAG_member [8] 
+0x00000130:   TAG_member [8]
                 AT_name( "_p1" )
                 AT_APPLE_property ( {0x00000110} "p1" )
                 AT_type( {0x00000150} ( int ) )
                 AT_artificial ( 0x1 )
 
-0x00000140:    TAG_member [8] 
+0x00000140:    TAG_member [8]
                  AT_name( "n2" )
                  AT_APPLE_property ( {0x00000120} "p2" )
                  AT_type( {0x00000150} ( int ) )
@@ -1938,7 +1946,7 @@ This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output
 </pre>
 </div>
 
-<p> Note, the current convention is that the name of the ivar for an 
+<p> Note, the current convention is that the name of the ivar for an
 auto-synthesized property is the name of the property from which it derives with
 an underscore prepended, as is shown in the example.
 But we actually don't need to know this convention, since we are given the name
@@ -1946,14 +1954,14 @@ of the ivar directly.
 </p>
 
 <p>
-Also, it is common practice in ObjC to have different property declarations in 
-the @interface and @implementation - e.g. to provide a read-only property in 
-the interface,and a read-write interface in the implementation.  In that case, 
-the compiler should emit whichever property declaration will be in force in the 
+Also, it is common practice in ObjC to have different property declarations in
+the @interface and @implementation - e.g. to provide a read-only property in
+the interface,and a read-write interface in the implementation.  In that case,
+the compiler should emit whichever property declaration will be in force in the
 current translation unit.
 </p>
 
-<p> Developers can decorate a property with attributes which are encoded using 
+<p> Developers can decorate a property with attributes which are encoded using
 DW_AT_APPLE_property_attribute.
 </p>
 
@@ -1967,26 +1975,26 @@ Which produces a property tag:
 <p>
 <div class="doc_code">
 <pre>
-TAG_APPLE_property [8] 
-  AT_name( "pr" ) 
-  AT_type ( {0x00000147} (int) ) 
+TAG_APPLE_property [8]
+  AT_name( "pr" )
+  AT_type ( {0x00000147} (int) )
   AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
 </pre>
 </div>
 
-<p> The setter and getter method names are attached to the property using 
+<p> The setter and getter method names are attached to the property using
 DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes.
 </p>
 <div class="doc_code">
 <pre>
-@interface I1 
-@property (setter=myOwnP3Setter:) int p3; 
--(void)myOwnP3Setter:(int)a; 
+@interface I1
+@property (setter=myOwnP3Setter:) int p3;
+-(void)myOwnP3Setter:(int)a;
 @end
 
-@implementation I1 
+@implementation I1
 @synthesize p3;
--(void)myOwnP3Setter:(int)a{ } 
+-(void)myOwnP3Setter:(int)a{ }
 @end
 </pre>
 </div>
@@ -1996,19 +2004,19 @@ The DWARF for this would be:
 </p>
 <div class="doc_code">
 <pre>
-0x000003bd: TAG_structure_type [7] * 
+0x000003bd: TAG_structure_type [7] *
               AT_APPLE_runtime_class( 0x10 )
               AT_name( "I1" )
-              AT_decl_file( "Objc_Property.m" ) 
+              AT_decl_file( "Objc_Property.m" )
               AT_decl_line( 3 )
 
 0x000003cd      TAG_APPLE_property
                   AT_name ( "p3" )
                   AT_APPLE_property_setter ( "myOwnP3Setter:" )
                   AT_type( {0x00000147} ( int ) )
-              
-0x000003f3:     TAG_member [8] 
-                  AT_name( "_p3" ) 
+
+0x000003f3:     TAG_member [8]
+                  AT_name( "_p3" )
                   AT_type ( {0x00000147} ( int ) )
                   AT_APPLE_property ( {0x000003cd} )
                   AT_artificial ( 0x1 )
@@ -2025,13 +2033,15 @@ The DWARF for this would be:
 
 <div>
 <table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
   <tr>
-    <th width=200 >TAG</th>
-    <th width=200 >Value</th>
+    <th>TAG</th>
+    <th>Value</th>
   </tr>
   <tr>
-    <td width=200 >DW_TAG_APPLE_property</td>
-    <td width=200 >0x4200</td>
+    <td>DW_TAG_APPLE_property</td>
+    <td>0x4200</td>
   </tr>
 </table>
 
@@ -2045,30 +2055,33 @@ The DWARF for this would be:
 
 <div>
 <table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
+  <col width="200">
   <tr>
-    <th width=200 >Attribute</th>
-    <th width=200 >Value</th>
-    <th width=200 >Classes</th>
+    <th>Attribute</th>
+    <th>Value</th>
+    <th>Classes</th>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_property</td>
-    <td width=200 >0x3fed</td>
-    <td width=200 >Reference</td>
+    <td>DW_AT_APPLE_property</td>
+    <td>0x3fed</td>
+    <td>Reference</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_property_getter</td>
-    <td width=200 >0x3fe9</td>
-    <td width=200 >String</td>
+    <td>DW_AT_APPLE_property_getter</td>
+    <td>0x3fe9</td>
+    <td>String</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_property_setter</td>
-    <td width=200 >0x3fea</td>
-    <td width=200 >String</td>
+    <td>DW_AT_APPLE_property_setter</td>
+    <td>0x3fea</td>
+    <td>String</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_property_attribute</td>
-    <td width=200 >0x3feb</td>
-    <td width=200 >Constant</td>
+    <td>DW_AT_APPLE_property_attribute</td>
+    <td>0x3feb</td>
+    <td>Constant</td>
   </tr>
 </table>
 
@@ -2082,33 +2095,35 @@ The DWARF for this would be:
 
 <div>
 <table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
   <tr>
-    <th width=200 >Name</th>
-    <th width=200 >Value</th>
+    <th>Name</th>
+    <th>Value</th>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_readonly</td>
-    <td width=200 >0x1</td>
+    <td>DW_AT_APPLE_PROPERTY_readonly</td>
+    <td>0x1</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_readwrite</td>
-    <td width=200 >0x2</td>
+    <td>DW_AT_APPLE_PROPERTY_readwrite</td>
+    <td>0x2</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_assign</td>
-    <td width=200 >0x4</td>
+    <td>DW_AT_APPLE_PROPERTY_assign</td>
+    <td>0x4</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_retain</td>
-    <td width=200 >0x8</td>
+    <td>DW_AT_APPLE_PROPERTY_retain</td>
+    <td>0x8</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_copy</td>
-    <td width=200 >0x10</td>
+    <td>DW_AT_APPLE_PROPERTY_copy</td>
+    <td>0x10</td>
   </tr>
   <tr>
-    <td width=200 >DW_AT_APPLE_PROPERTY_nonatomic</td>
-    <td width=200 >0x20</td>
+    <td>DW_AT_APPLE_PROPERTY_nonatomic</td>
+    <td>0x20</td>
   </tr>
 </table>
 
@@ -2116,6 +2131,681 @@ The DWARF for this would be:
 </div>
 </div>
 
+<div>
+<!-- ======================================================================= -->
+<h3>
+  <a name="acceltable">Name Accelerator Tables</a>
+</h3>
+<!-- ======================================================================= -->
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltableintroduction">Introduction</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>The .debug_pubnames and .debug_pubtypes formats are not what a debugger
+  needs. The "pub" in the section name indicates that the entries in the
+  table are publicly visible names only. This means no static or hidden
+  functions show up in the .debug_pubnames. No static variables or private class
+  variables are in the .debug_pubtypes. Many compilers add different things to
+  these tables, so we can't rely upon the contents between gcc, icc, or clang.</p>
+
+<p>The typical query given by users tends not to match up with the contents of
+  these tables. For example, the DWARF spec states that "In the case of the
+  name of a function member or static data member of a C++ structure, class or
+  union, the name presented in the .debug_pubnames section is not the simple
+  name given by the DW_AT_name attribute of the referenced debugging information
+  entry, but rather the fully qualified name of the data or function member."
+  So the only names in these tables for complex C++ entries is a fully
+  qualified name.  Debugger users tend not to enter their search strings as
+  "a::b::c(int,const Foo&) const", but rather as "c", "b::c" , or "a::b::c".  So
+  the name entered in the name table must be demangled in order to chop it up
+  appropriately and additional names must be manually entered into the table
+  to make it effective as a name lookup table for debuggers to use.</p>
+
+<p>All debuggers currently ignore the .debug_pubnames table as a result of
+  its inconsistent and useless public-only name content making it a waste of
+  space in the object file. These tables, when they are written to disk, are
+  not sorted in any way, leaving every debugger to do its own parsing
+  and sorting. These tables also include an inlined copy of the string values
+  in the table itself making the tables much larger than they need to be on
+  disk, especially for large C++ programs.</p>
+
+<p>Can't we just fix the sections by adding all of the names we need to this
+  table? No, because that is not what the tables are defined to contain and we
+  won't know the difference between the old bad tables and the new good tables.
+  At best we could make our own renamed sections that contain all of the data
+  we need.</p>
+
+<p>These tables are also insufficient for what a debugger like LLDB needs.
+  LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is
+  then often asked to look for type "foo" or namespace "bar", or list items in
+  namespace "baz". Namespaces are not included in the pubnames or pubtypes
+  tables. Since clang asks a lot of questions when it is parsing an expression,
+  we need to be very fast when looking up names, as it happens a lot. Having new
+  accelerator tables that are optimized for very quick lookups will benefit
+  this type of debugging experience greatly.</p>
+
+<p>We would like to generate name lookup tables that can be mapped into
+  memory from disk, and used as is, with little or no up-front parsing. We would
+  also be able to control the exact content of these different tables so they
+  contain exactly what we need. The Name Accelerator Tables were designed
+  to fix these issues. In order to solve these issues we need to:</p>
+
+<ul>
+  <li>Have a format that can be mapped into memory from disk and used as is</li>
+  <li>Lookups should be very fast</li>
+  <li>Extensible table format so these tables can be made by many producers</li>
+  <li>Contain all of the names needed for typical lookups out of the box</li>
+  <li>Strict rules for the contents of tables</li>
+</ul>
+
+<p>Table size is important and the accelerator table format should allow the
+  reuse of strings from common string tables so the strings for the names are
+  not duplicated. We also want to make sure the table is ready to be used as-is
+  by simply mapping the table into memory with minimal header parsing.</p>
+
+<p>The name lookups need to be fast and optimized for the kinds of lookups
+  that debuggers tend to do. Optimally we would like to touch as few parts of
+  the mapped table as possible when doing a name lookup and be able to quickly
+  find the name entry we are looking for, or discover there are no matches. In
+  the case of debuggers we optimized for lookups that fail most of the time.</p>
+
+<p>Each table that is defined should have strict rules on exactly what is in
+  the accelerator tables and documented so clients can rely on the content.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltablehashes">Hash Tables</a>
+</h4>
+<!-- ======================================================================= -->
+
+<div>
+<h5>Standard Hash Tables</h5>
+
+<p>Typical hash tables have a header, buckets, and each bucket points to the
+bucket contents:
+</p>
+
+<div class="doc_code">
+<pre>
+.------------.
+|  HEADER    |
+|------------|
+|  BUCKETS   |
+|------------|
+|  DATA      |
+`------------'
+</pre>
+</div>
+
+<p>The BUCKETS are an array of offsets to DATA for each hash:</p>
+
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001000 | BUCKETS[0]
+| 0x00002000 | BUCKETS[1]
+| 0x00002200 | BUCKETS[2]
+| 0x000034f0 | BUCKETS[3]
+|            | ...
+| 0xXXXXXXXX | BUCKETS[n_buckets]
+'------------'
+</pre>
+</div>
+
+<p>So for bucket[3] in the example above, we have an offset into the table
+  0x000034f0 which points to a chain of entries for the bucket. Each bucket
+  must contain a next pointer, full 32 bit hash value, the string itself,
+  and the data for the current string value.</p>
+
+<div class="doc_code">
+<pre>
+            .------------.
+0x000034f0: | 0x00003500 | next pointer
+            | 0x12345678 | 32 bit hash
+            | "erase"    | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003500: | 0x00003550 | next pointer
+            | 0x29273623 | 32 bit hash
+            | "dump"     | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003550: | 0x00000000 | next pointer
+            | 0x82638293 | 32 bit hash
+            | "main"     | string value
+            | data[n]    | HashData for this bucket
+            `------------'
+</pre>
+</div>
+
+<p>The problem with this layout for debuggers is that we need to optimize for
+  the negative lookup case where the symbol we're searching for is not present.
+  So if we were to lookup "printf" in the table above, we would make a 32 hash
+  for "printf", it might match bucket[3]. We would need to go to the offset
+  0x000034f0 and start looking to see if our 32 bit hash matches. To do so, we
+  need to read the next pointer, then read the hash, compare it, and skip to
+  the next bucket. Each time we are skipping many bytes in memory and touching
+  new cache pages just to do the compare on the full 32 bit hash. All of these
+  accesses then tell us that we didn't have a match.</p>
+
+<h5>Name Hash Tables</h5>
+
+<p>To solve the issues mentioned above we have structured the hash tables
+  a bit differently: a header, buckets, an array of all unique 32 bit hash
+  values, followed by an array of hash value data offsets, one for each hash
+  value, then the data for all hash values:</p>
+
+<div class="doc_code">
+<pre>
+.-------------.
+|  HEADER     |
+|-------------|
+|  BUCKETS    |
+|-------------|
+|  HASHES     |
+|-------------|
+|  OFFSETS    |
+|-------------|
+|  DATA       |
+`-------------'
+</pre>
+</div>
+
+<p>The BUCKETS in the name tables are an index into the HASHES array. By
+  making all of the full 32 bit hash values contiguous in memory, we allow
+  ourselves to efficiently check for a match while touching as little
+  memory as possible. Most often checking the 32 bit hash values is as far as
+  the lookup goes. If it does match, it usually is a match with no collisions.
+  So for a table with "n_buckets" buckets, and "n_hashes" unique 32 bit hash
+  values, we can clarify the contents of the BUCKETS, HASHES and OFFSETS as:</p>
+
+<div class="doc_code">
+<pre>
+.-------------------------.
+|  HEADER.magic           | uint32_t
+|  HEADER.version         | uint16_t
+|  HEADER.hash_function   | uint16_t
+|  HEADER.bucket_count    | uint32_t
+|  HEADER.hashes_count    | uint32_t
+|  HEADER.header_data_len | uint32_t
+|  HEADER_DATA            | HeaderData
+|-------------------------|
+|  BUCKETS                | uint32_t[n_buckets] // 32 bit hash indexes
+|-------------------------|
+|  HASHES                 | uint32_t[n_buckets] // 32 bit hash values
+|-------------------------|
+|  OFFSETS                | uint32_t[n_buckets] // 32 bit offsets to hash value data
+|-------------------------|
+|  ALL HASH DATA          |
+`-------------------------'
+</pre>
+</div>
+
+<p>So taking the exact same data from the standard hash example above we end up
+  with:</p>
+
+<div class="doc_code">
+<pre>
+            .------------.
+            | HEADER     |
+            |------------|
+            |          0 | BUCKETS[0]
+            |          2 | BUCKETS[1]
+            |          5 | BUCKETS[2]
+            |          6 | BUCKETS[3]
+            |            | ...
+            |        ... | BUCKETS[n_buckets]
+            |------------|
+            | 0x........ | HASHES[0]
+            | 0x........ | HASHES[1]
+            | 0x........ | HASHES[2]
+            | 0x........ | HASHES[3]
+            | 0x........ | HASHES[4]
+            | 0x........ | HASHES[5]
+            | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
+            | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
+            | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
+            | 0x........ | HASHES[9]
+            | 0x........ | HASHES[10]
+            | 0x........ | HASHES[11]
+            | 0x........ | HASHES[12]
+            | 0x........ | HASHES[13]
+            | 0x........ | HASHES[n_hashes]
+            |------------|
+            | 0x........ | OFFSETS[0]
+            | 0x........ | OFFSETS[1]
+            | 0x........ | OFFSETS[2]
+            | 0x........ | OFFSETS[3]
+            | 0x........ | OFFSETS[4]
+            | 0x........ | OFFSETS[5]
+            | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
+            | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
+            | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
+            | 0x........ | OFFSETS[9]
+            | 0x........ | OFFSETS[10]
+            | 0x........ | OFFSETS[11]
+            | 0x........ | OFFSETS[12]
+            | 0x........ | OFFSETS[13]
+            | 0x........ | OFFSETS[n_hashes]
+            |------------|
+            |            |
+            |            |
+            |            |
+            |            |
+            |            |
+            |------------|
+0x000034f0: | 0x00001203 | .debug_str ("erase")
+            | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
+            | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x00001203 | String offset into .debug_str ("dump")
+            | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003550: | 0x00001203 | String offset into .debug_str ("main")
+            | 0x00000009 | A 32 bit array count - number of HashData with name "main"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x........ | HashData[4]
+            | 0x........ | HashData[5]
+            | 0x........ | HashData[6]
+            | 0x........ | HashData[7]
+            | 0x........ | HashData[8]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            `------------'
+</pre>
+</div>
+
+<p>So we still have all of the same data, we just organize it more efficiently
+  for debugger lookup. If we repeat the same "printf" lookup from above, we
+  would hash "printf" and find it matches BUCKETS[3] by taking the 32 bit hash
+  value and modulo it by n_buckets. BUCKETS[3] contains "6" which is the index
+  into the HASHES table. We would then compare any consecutive 32 bit hashes
+  values in the HASHES array as long as the hashes would be in BUCKETS[3]. We
+  do this by verifying that each subsequent hash value modulo n_buckets is still
+  3. In the case of a failed lookup we would access the memory for BUCKETS[3], and
+  then compare a few consecutive 32 bit hashes before we know that we have no match.
+  We don't end up marching through multiple words of memory and we really keep the
+  number of processor data cache lines being accessed as small as possible.</p>
+
+<p>The string hash that is used for these lookup tables is the Daniel J.
+  Bernstein hash which is also used in the ELF GNU_HASH sections. It is a very
+  good hash for all kinds of names in programs with very few hash collisions.</p>
+
+<p>Empty buckets are designated by using an invalid hash index of UINT32_MAX.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltabledetails">Details</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>These name hash tables are designed to be generic where specializations of
+  the table get to define additional data that goes into the header
+  ("HeaderData"), how the string value is stored ("KeyType") and the content
+  of the data for each hash value.</p>
+
+<h5>Header Layout</h5>
+<p>The header has a fixed part, and the specialized part. The exact format of
+  the header is:</p>
+<div class="doc_code">
+<pre>
+struct Header
+{
+  uint32_t   magic;           // 'HASH' magic value to allow endian detection
+  uint16_t   version;         // Version number
+  uint16_t   hash_function;   // The hash function enumeration that was used
+  uint32_t   bucket_count;    // The number of buckets in this hash table
+  uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
+  uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
+                              // Specifically the length of the following HeaderData field - this does not
+                              // include the size of the preceding fields
+  HeaderData header_data;     // Implementation specific header data
+};
+</pre>
+</div>
+<p>The header starts with a 32 bit "magic" value which must be 'HASH' encoded as
+  an ASCII integer. This allows the detection of the start of the hash table and
+  also allows the table's byte order to be determined so the table can be
+  correctly extracted. The "magic" value is followed by a 16 bit version number
+  which allows the table to be revised and modified in the future. The current
+  version number is 1. "hash_function" is a uint16_t enumeration that specifies
+  which hash function was used to produce this table. The current values for the
+  hash function enumerations include:</p>
+<div class="doc_code">
+<pre>
+enum HashFunctionType
+{
+  eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
+};
+</pre>
+</div>
+<p>"bucket_count" is a 32 bit unsigned integer that represents how many buckets
+  are in the BUCKETS array. "hashes_count" is the number of unique 32 bit hash
+  values that are in the HASHES array, and is the same number of offsets are
+  contained in the OFFSETS array. "header_data_len" specifies the size in
+  bytes of the HeaderData that is filled in by specialized versions of this
+  table.</p>
+
+<h5>Fixed Lookup</h5>
+<p>The header is followed by the buckets, hashes, offsets, and hash value
+  data.
+<div class="doc_code">
+<pre>
+struct FixedTable
+{
+  uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
+  uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
+  uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
+};
+</pre>
+</div>
+<p>"buckets" is an array of 32 bit indexes into the "hashes" array. The
+  "hashes" array contains all of the 32 bit hash values for all names in the
+  hash table. Each hash in the "hashes" table has an offset in the "offsets"
+  array that points to the data for the hash value.</p>
+
+<p>This table setup makes it very easy to repurpose these tables to contain
+  different data, while keeping the lookup mechanism the same for all tables.
+  This layout also makes it possible to save the table to disk and map it in
+  later and do very efficient name lookups with little or no parsing.</p>
+
+<p>DWARF lookup tables can be implemented in a variety of ways and can store
+  a lot of information for each name. We want to make the DWARF tables
+  extensible and able to store the data efficiently so we have used some of the
+  DWARF features that enable efficient data storage to define exactly what kind
+  of data we store for each name.</p>
+
+<p>The "HeaderData" contains a definition of the contents of each HashData
+  chunk. We might want to store an offset to all of the debug information
+  entries (DIEs) for each name. To keep things extensible, we create a list of
+  items, or Atoms, that are contained in the data for each name. First comes the
+  type of the data in each atom:</p>
+<div class="doc_code">
+<pre>
+enum AtomType
+{
+  eAtomTypeNULL       = 0u,
+  eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+  eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
+  eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
+  eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+  eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
+};
+</pre>
+</div>
+<p>The enumeration values and their meanings are:</p>
+<div class="doc_code">
+<pre>
+  eAtomTypeNULL       - a termination atom that specifies the end of the atom list
+  eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
+  eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
+  eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
+  eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
+  eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
+</pre>
+</div>
+<p>Then we allow each atom type to define the atom type and how the data for
+  each atom type data is encoded:</p>
+<div class="doc_code">
+<pre>
+struct Atom
+{
+  uint16_t type;  // AtomType enum value
+  uint16_t form;  // DWARF DW_FORM_XXX defines
+};
+</pre>
+</div>
+<p>The "form" type above is from the DWARF specification and defines the
+  exact encoding of the data for the Atom type. See the DWARF specification for
+  the DW_FORM_ definitions.</p>
+<div class="doc_code">
+<pre>
+struct HeaderData
+{
+  uint32_t die_offset_base;
+  uint32_t atom_count;
+  Atoms    atoms[atom_count0];
+};
+</pre>
+</div>
+<p>"HeaderData" defines the base DIE offset that should be added to any atoms
+  that are encoded using the DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
+  DW_FORM_ref8 or DW_FORM_ref_udata. It also defines what is contained in
+  each "HashData" object -- Atom.form tells us how large each field will be in
+  the HashData and the Atom.type tells us how this data should be interpreted.</p>
+
+<p>For the current implementations of the ".apple_names" (all functions + globals),
+  the ".apple_types" (names of all types that are defined), and the
+  ".apple_namespaces" (all namespaces), we currently set the Atom array to be:</p>
+<div class="doc_code">
+<pre>
+HeaderData.atom_count = 1;
+HeaderData.atoms[0].type = eAtomTypeDIEOffset;
+HeaderData.atoms[0].form = DW_FORM_data4;
+</pre>
+</div>
+<p>This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
+  encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have
+  multiple matching DIEs in a single file, which could come up with an inlined
+  function for instance. Future tables could include more information about the
+  DIE such as flags indicating if the DIE is a function, method, block,
+  or inlined.</p>
+
+<p>The KeyType for the DWARF table is a 32 bit string table offset into the
+  ".debug_str" table. The ".debug_str" is the string table for the DWARF which
+  may already contain copies of all of the strings. This helps make sure, with
+  help from the compiler, that we reuse the strings between all of the DWARF
+  sections and keeps the hash table size down. Another benefit to having the
+  compiler generate all strings as DW_FORM_strp in the debug info, is that
+  DWARF parsing can be made much faster.</p>
+
+<p>After a lookup is made, we get an offset into the hash data. The hash data
+  needs to be able to deal with 32 bit hash collisions, so the chunk of data
+  at the offset in the hash data consists of a triple:</p>
+<div class="doc_code">
+<pre>
+uint32_t str_offset
+uint32_t hash_data_count
+HashData[hash_data_count]
+</pre>
+</div>
+<p>If "str_offset" is zero, then the bucket contents are done. 99.9% of the
+  hash data chunks contain a single item (no 32 bit hash collision):</p>
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+</pre>
+</div>
+<p>If there are collisions, you will have multiple valid string offsets:</p>
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
+| 0x00000002 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+</pre>
+</div>
+<p>Current testing with real world C++ binaries has shown that there is around 1
+  32 bit hash collision per 100,000 name entries.</p>
+</div>
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltablecontents">Contents</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>As we said, we want to strictly define exactly what is included in the
+  different tables. For DWARF, we have 3 tables: ".apple_names", ".apple_types",
+  and ".apple_namespaces".</p>
+
+<p>".apple_names" sections should contain an entry for each DWARF DIE whose
+  DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or DW_TAG_subprogram that
+  has address attributes: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges or
+  DW_AT_entry_pc. It also contains DW_TAG_variable DIEs that have a DW_OP_addr
+  in the location (global and static variables). All global and static variables
+  should be included, including those scoped withing functions and classes. For
+  example using the following code:</p>
+<div class="doc_code">
+<pre>
+static int var = 0;
+
+void f ()
+{
+  static int var = 0;
+}
+</pre>
+</div>
+<p>Both of the static "var" variables would be included in the table. All
+  functions should emit both their full names and their basenames. For C or C++,
+  the full name is the mangled name (if available) which is usually in the
+  DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the function
+  basename. If global or static variables have a mangled name in a
+  DW_AT_MIPS_linkage_name attribute, this should be emitted along with the
+  simple name found in the DW_AT_name attribute.</p>
+
+<p>".apple_types" sections should contain an entry for each DWARF DIE whose
+  tag is one of:</p>
+<ul>
+  <li>DW_TAG_array_type</li>
+  <li>DW_TAG_class_type</li>
+  <li>DW_TAG_enumeration_type</li>
+  <li>DW_TAG_pointer_type</li>
+  <li>DW_TAG_reference_type</li>
+  <li>DW_TAG_string_type</li>
+  <li>DW_TAG_structure_type</li>
+  <li>DW_TAG_subroutine_type</li>
+  <li>DW_TAG_typedef</li>
+  <li>DW_TAG_union_type</li>
+  <li>DW_TAG_ptr_to_member_type</li>
+  <li>DW_TAG_set_type</li>
+  <li>DW_TAG_subrange_type</li>
+  <li>DW_TAG_base_type</li>
+  <li>DW_TAG_const_type</li>
+  <li>DW_TAG_constant</li>
+  <li>DW_TAG_file_type</li>
+  <li>DW_TAG_namelist</li>
+  <li>DW_TAG_packed_type</li>
+  <li>DW_TAG_volatile_type</li>
+  <li>DW_TAG_restrict_type</li>
+  <li>DW_TAG_interface_type</li>
+  <li>DW_TAG_unspecified_type</li>
+  <li>DW_TAG_shared_type</li>
+</ul>
+<p>Only entries with a DW_AT_name attribute are included, and the entry must
+  not be a forward declaration (DW_AT_declaration attribute with a non-zero value).
+  For example, using the following code:</p>
+<div class="doc_code">
+<pre>
+int main ()
+{
+  int *b = 0;
+  return *b;
+}
+</pre>
+</div>
+<p>We get a few type DIEs:</p>
+<div class="doc_code">
+<pre>
+0x00000067:     TAG_base_type [5]
+                AT_encoding( DW_ATE_signed )
+                AT_name( "int" )
+                AT_byte_size( 0x04 )
+
+0x0000006e:     TAG_pointer_type [6]
+                AT_type( {0x00000067} ( int ) )
+                AT_byte_size( 0x08 )
+</pre>
+</div>
+<p>The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.</p>
+
+<p>".apple_namespaces" section should contain all DW_TAG_namespace DIEs. If
+  we run into a namespace that has no name this is an anonymous namespace,
+  and the name should be output as "(anonymous namespace)" (without the quotes).
+  Why? This matches the output of the abi::cxa_demangle() that is in the standard
+  C++ library that demangles mangled names.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltableextensions">Language Extensions and File Format Changes</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<h5>Objective-C Extensions</h5>
+<p>".apple_objc" section should contain all DW_TAG_subprogram DIEs for an
+  Objective-C class. The name used in the hash table is the name of the
+  Objective-C class itself. If the Objective-C class has a category, then an
+  entry is made for both the class name without the category, and for the class
+  name with the category. So if we have a DIE at offset 0x1234 with a name
+  of method "-[NSString(my_additions) stringWithSpecialString:]", we would add
+  an entry for "NSString" that points to DIE 0x1234, and an entry for
+  "NSString(my_additions)" that points to 0x1234. This allows us to quickly
+  track down all Objective-C methods for an Objective-C class when doing
+  expressions. It is needed because of the dynamic nature of Objective-C where
+  anyone can add methods to a class. The DWARF for Objective-C methods is also
+  emitted differently from C++ classes where the methods are not usually
+  contained in the class definition, they are scattered about across one or more
+  compile units. Categories can also be defined in different shared libraries.
+  So we need to be able to quickly find all of the methods and class functions
+  given the Objective-C class name, or quickly find all methods and class
+  functions for a class + category name. This table does not contain any selector
+  names, it just maps Objective-C class names (or class names + category) to all
+  of the methods and class functions. The selectors are added as function
+  basenames in the .debug_names section.</p>
+
+<p>In the ".apple_names" section for Objective-C functions, the full name is the
+  entire function name with the brackets ("-[NSString stringWithCString:]") and the
+  basename is the selector only ("stringWithCString:").</p>
+
+<h5>Mach-O Changes</h5>
+<p>The sections names for the apple hash tables are for non mach-o files. For
+  mach-o files, the sections should be contained in the "__DWARF" segment with
+  names as follows:</p>
+<ul>
+  <li>".apple_names" -> "__apple_names"</li>
+  <li>".apple_types" -> "__apple_types"</li>
+  <li>".apple_namespaces" -> "__apple_namespac" (16 character limit)</li>
+  <li> ".apple_objc" -> "__apple_objc"</li>
+</ul>
+</div>
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html
new file mode 100644
index 0000000..876fe42
--- /dev/null
+++ b/docs/TestSuiteMakefileGuide.html
@@ -0,0 +1,351 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM test-suite Makefile Guide</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+      
+<h1>
+  LLVM test-suite Makefile Guide
+</h1>
+
+<ol>
+  <li><a href="#overview">Overview</a></li>
+  <li><a href="#testsuitestructure">Test suite structure</a></li>
+  <li><a href="#testsuiterun">Running the test suite</a>
+    <ul>
+      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
+      <li><a href="#testsuitetests">Running different tests</a></li>
+      <li><a href="#testsuiteoutput">Generating test output</a></li>
+      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
+   </ul>
+  </li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
+</div>
+
+<!--=========================================================================-->
+<h2><a name="overview">Overview</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>This document describes the features of the Makefile-based LLVM
+test-suite. This way of interacting with the test-suite is deprecated in favor
+of running the test-suite using LNT, but may continue to prove useful for some
+users. See the Testing
+Guide's <a href="TestingGuide.html#testsuitequickstart">test-suite
+Quickstart</a> section for more information.</p>
+
+</div>
+
+<!--=========================================================================-->
+<h2><a name="testsuitestructure">Test suite Structure</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
+with LLVM and executed. These programs are compiled using the native compiler
+and various LLVM backends. The output from the program compiled with the 
+native compiler is assumed correct; the results from the other programs are
+compared to the native program output and pass if they match.</p>
+
+<p>When executing tests, it is usually a good idea to start out with a subset of
+the available tests or programs. This makes test run times smaller at first and
+later on this is useful to investigate individual test failures. To run some
+test only on a subset of programs, simply change directory to the programs you
+want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
+test using the <tt>TEST</tt> variable to change what tests or run on the
+selected programs (see below for more info).</p>
+
+<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
+performs timing tests of various LLVM optimizations.  It also records
+compilation times for the compilers and the JIT.  This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.</p>
+
+<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
+SingleSource, and External.</p> 
+
+<ul>
+<li><tt>test-suite/SingleSource</tt>
+<p>The SingleSource directory contains test programs that are only a single 
+source file in size.  These are usually small benchmark programs or small 
+programs that calculate a particular value.  Several such programs are grouped 
+together in each directory.</p></li>
+
+<li><tt>test-suite/MultiSource</tt>
+<p>The MultiSource directory contains subdirectories which contain entire 
+programs with multiple source files.  Large benchmarks and whole applications 
+go here.</p></li>
+
+<li><tt>test-suite/External</tt>
+<p>The External directory contains Makefiles for building code that is external
+to (i.e., not distributed with) LLVM.  The most prominent members of this
+directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
+directory does not contain these actual tests, but only the Makefiles that know
+how to properly compile these programs from somewhere else. The presence and
+location of these external programs is configured by the test-suite
+<tt>configure</tt> script.</p></li>
+</ul>
+
+<p>Each tree is then subdivided into several categories, including applications,
+benchmarks, regression tests, code that is strange grammatically, etc.  These
+organizations should be relatively self explanatory.</p>
+
+<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add).  In the
+regression tests, the result for such tests will be XFAIL (eXpected FAILure).
+In this way, you can tell the difference between an expected and unexpected
+failure.</p>
+
+<p>The tests in the test suite have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be generated.  If
+a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
+will help you separate benign warnings from actual test failures.</p>
+
+</div>
+
+<!--=========================================================================-->
+<h2><a name="testsuiterun">Running the test suite</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>First, all tests are executed within the LLVM object directory tree.  They
+<i>are not</i> executed inside of the LLVM source tree. This is because the
+test suite creates temporary files during execution.</p>
+
+<p>To run the test suite, you need to use the following steps:</p>
+
+<ol>
+  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
+  </li>
+
+  <li><p>Check out the <tt>test-suite</tt> module with:</p>
+
+<div class="doc_code">
+<pre>
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+</pre>
+</div>
+    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
+  </li>
+  <li><p>Configure and build <tt>llvm</tt>.</p></li>
+  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
+  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
+  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
+      each build tree (LLVM object directory tree) in which you want
+      to run the test suite, just as you do before building LLVM.</p>
+    <p>During the <em>re-configuration</em>, you must either: (1)
+      have <tt>llvm-gcc</tt> you just built in your path, or (2)
+      specify the directory where your just-built <tt>llvm-gcc</tt> is
+      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
+    <p>You must also tell the configure machinery that the test suite
+      is available so it can be configured for your build tree:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
+</pre>
+</div>
+    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
+    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
+  </li>
+
+  <li><p>You can now run the test suite from your build tree as follows:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT/projects/test-suite
+% make
+</pre>
+</div>
+  </li>
+</ol>
+<p>Note that the second and third steps only need to be done once. After you
+have the suite checked out and configured, you don't need to do it again (unless
+the test code or configure script changes).</p>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuiteexternal">Configuring External Tests</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+
+<div>
+<p>In order to run the External tests in the <tt>test-suite</tt>
+  module, you must specify <i>--with-externals</i>.  This
+  must be done during the <em>re-configuration</em> step (see above),
+  and the <tt>llvm</tt> re-configuration must recognize the
+  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
+  neglected, the External tests won't work.</p>
+<dl>
+<dt><i>--with-externals</i></dt>
+<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
+</dl>
+  This tells LLVM where to find any external tests.  They are expected to be
+  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
+  If <tt>directory</tt> is left unspecified,
+  <tt>configure</tt> uses the default value
+  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
+  Subdirectory names known to LLVM include:
+  <dl>
+  <dt>spec95</dt>
+  <dt>speccpu2000</dt>
+  <dt>speccpu2006</dt>
+  <dt>povray31</dt>
+  </dl>
+  Others are added from time to time, and can be determined from 
+  <tt>configure</tt>.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuitetests">Running different tests</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+<div>
+<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
+module also provides a mechanism for compiling the programs in different ways.
+If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
+include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
+This Makefile can modify build rules to yield different results.</p>
+
+<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
+create the nightly test reports.  To run the nightly tests, run <tt>gmake
+TEST=nightly</tt>.</p>
+
+<p>There are several TEST Makefiles available in the tree.  Some of them are
+designed for internal LLVM research and will not work outside of the LLVM
+research group.  They may still be valuable, however, as a guide to writing your
+own TEST Makefile for any optimization or analysis passes that you develop with
+LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuiteoutput">Generating test output</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+<div>
+  <p>There are a number of ways to run the tests and generate output. The most
+  simple one is simply running <tt>gmake</tt> with no arguments. This will
+  compile and run all programs in the tree using a number of different methods
+  and compare results. Any failures are reported in the output, but are likely
+  drowned in the other output. Passes are not reported explicitely.</p>
+
+  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
+  the specified test and usually adds per-program summaries to the output
+  (depending on which sometest you use). For example, the <tt>nightly</tt> test
+  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
+  Though these lines are still drowned in the output, it's easy to grep the
+  output logs in the Output directories.</p>
+
+  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
+  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
+  <tt>graphs</tt>). The exact contents of the report are dependent on which
+  <tt>TEST</tt> you are running, but the text results are always shown at the
+  end of the run and the results are always stored in the
+  <tt>report.&lt;type&gt;.format</tt> file (when running with
+  <tt>TEST=&lt;type&gt;</tt>).
+
+  The <tt>report</tt> also generate a file called
+  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
+  run.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuitecustom">Writing custom tests for the test suite</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+
+<div>
+
+<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
+should work), it is really easy to run optimizations or code generator
+components against every program in the tree, collecting statistics or running
+custom checks for correctness.  At base, this is how the nightly tester works,
+it's just one example of a general framework.</p>
+
+<p>Lets say that you have an LLVM optimization pass, and you want to see how
+many times it triggers.  First thing you should do is add an LLVM
+<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
+will tally counts of things you care about.</p>
+
+<p>Following this, you can set up a test and a report that collects these and
+formats them for easy viewing.  This consists of two files, a
+"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
+test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
+format the output into a table.  There are many example reports of various
+levels of sophistication included with the test suite, and the framework is very
+general.</p>
+
+<p>If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example.  It can be run like this:<p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=libcalls report
+</pre>
+</div>
+
+<p>This will do a bunch of stuff, then eventually print a table like this:</p>
+
+<div class="doc_code">
+<pre>
+Name                                  | total | #exit |
+...
+FreeBench/analyzer/analyzer           | 51    | 6     | 
+FreeBench/fourinarow/fourinarow       | 1     | 1     | 
+FreeBench/neural/neural               | 19    | 9     | 
+FreeBench/pifft/pifft                 | 5     | 3     | 
+MallocBench/cfrac/cfrac               | 1     | *     | 
+MallocBench/espresso/espresso         | 52    | 12    | 
+MallocBench/gs/gs                     | 4     | *     | 
+Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
+Prolangs-C/agrep/agrep                | 33    | 12    | 
+Prolangs-C/allroots/allroots          | *     | *     | 
+Prolangs-C/assembler/assembler        | 47    | *     | 
+Prolangs-C/bison/mybison              | 74    | *     | 
+...
+</pre>
+</div>
+
+<p>This basically is grepping the -stats output and displaying it in a table.
+You can also use the "TEST=libcalls report.html" target to get the table in HTML
+form, similarly for report.csv and report.tex.</p>
+
+<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
+simple: the Makefile indicates how to run the test (in this case, 
+"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
+each column of the output.  The first value is the header for the column and the
+second is the regex to grep the output of the command for.  There are lots of
+example reports that can do fancy stuff.</p>
+
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index bb47cb1..fe5d836 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -18,14 +18,13 @@
   <li><a href="#org">LLVM testing infrastructure organization</a>
     <ul>
       <li><a href="#regressiontests">Regression tests</a></li>
-      <li><a href="#testsuite">Test suite</a></li>
+      <li><a href="#testsuite"><tt>test-suite</tt></a></li>
       <li><a href="#debuginfotests">Debugging Information tests</a></li>
     </ul>
   </li>
   <li><a href="#quick">Quick start</a>
     <ul>
       <li><a href="#quickregressiontests">Regression tests</a></li>
-      <li><a href="#quicktestsuite">Test suite</a></li>
       <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
    </ul>
   </li>
@@ -37,13 +36,10 @@
       <li><a href="#rtfeatures">Other features</a></li>
    </ul>
   </li>
-  <li><a href="#testsuitestructure">Test suite structure</a></li>
-  <li><a href="#testsuiterun">Running the test suite</a>
+  <li><a href="#testsuiteoverview"><tt>test-suite</tt> Overview</a>
     <ul>
-      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
-      <li><a href="#testsuitetests">Running different tests</a></li>
-      <li><a href="#testsuiteoutput">Generating test output</a></li>
-      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
+      <li><a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a></li>
+      <li><a href="#testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></li>
    </ul>
   </li>
 </ol>
@@ -85,10 +81,13 @@ as <a href="http://python.org">Python</a> 2.4 or later.</p>
 <p>The LLVM testing infrastructure contains two major categories of tests:
 regression tests and whole programs. The regression tests are contained inside
 the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
-pass -- they should be run before every commit. The whole programs tests are
-referred to as the "LLVM test suite" and are in the <tt>test-suite</tt> module
-in subversion.
-</p>
+pass -- they should be run before every commit.</p>
+
+<p>The whole programs tests are referred to as the "LLVM test suite" (or
+"test-suite") and are in the <tt>test-suite</tt> module in subversion. For
+historical reasons, these tests are also referred to as the "nightly tests" in
+places, which is less ambiguous than "test-suite" and remains in use although we
+run them much more often than nightly.</p>
 
 <!-- _______________________________________________________________________ -->
 <h3><a name="regressiontests">Regression tests</a></h3>
@@ -118,20 +117,19 @@ application or benchmark.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<h3><a name="testsuite">Test suite</a></h3>
+<h3><a name="testsuite"><tt>test-suite</tt></a></h3>
 <!-- _______________________________________________________________________ -->
 
 <div>
 
-<p>The test suite contains whole programs, which are pieces of
-code which can be compiled and linked into a stand-alone program that can be
-executed.  These programs are generally written in high level languages such as
-C or C++, but sometimes they are written straight in LLVM assembly.</p>
+<p>The test suite contains whole programs, which are pieces of code which can be
+compiled and linked into a stand-alone program that can be executed.  These
+programs are generally written in high level languages such as C or C++.</p>
 
-<p>These programs are compiled and then executed using several different
-methods (native compiler, LLVM C backend, LLVM JIT, LLVM native code generation,
-etc).  The output of these programs is compared to ensure that LLVM is compiling
-the program correctly.</p>
+<p>These programs are compiled using a user specified compiler and set of flags,
+and then executed to capture the program output and timing information.  The
+output of these programs is compared to a reference output to ensure that the
+program is being compiled correctly.</p>
 
 <p>In addition to compiling and executing programs, whole program tests serve as
 a way of benchmarking LLVM performance, both in terms of the efficiency of the
@@ -168,15 +166,14 @@ test suite for more information . This test suite is located in the
 
   <p>The tests are located in two separate Subversion modules. The regressions
   tests are in the main "llvm" module under the directory
-  <tt>llvm/test</tt> (so you get these tests for free with the main llvm tree).
-  The more comprehensive test suite that includes whole 
-programs in C and C++ is in the <tt>test-suite</tt> module. This module should
-be checked out to the <tt>llvm/projects</tt> directory (don't use another name
-than the default "test-suite", for then the test suite will be run every time
-you run <tt>make</tt> in the main <tt>llvm</tt> directory).
-When you <tt>configure</tt> the <tt>llvm</tt> module, 
-the <tt>test-suite</tt> directory will be automatically configured. 
-Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
+  <tt>llvm/test</tt> (so you get these tests for free with the main llvm
+  tree). Use "make check-all" to run the regression tests after building
+  LLVM.</p>
+
+  <p>The more comprehensive test suite that includes whole programs in C and C++
+  is in the <tt>test-suite</tt>
+  module. See <a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a>
+  for more information on running these tests.</p>
 
 <!-- _______________________________________________________________________ -->
 <h3><a name="quickregressiontests">Regression tests</a></h3>
@@ -243,55 +240,6 @@ script which is built as part of LLVM. For example, to run the
 </div>
 
 <!-- _______________________________________________________________________ -->
-<h3><a name="quicktestsuite">Test suite</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>To run the comprehensive test suite (tests that compile and execute whole 
-programs), first checkout and setup the <tt>test-suite</tt> module:</p>
-
-<div class="doc_code">
-<pre>
-% cd ~/llvm/projects
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-% cd ..
-</pre>
-</div>
-
-<p>and then configure and build normally as you would from the
-<a href="http://llvm.org/docs/GettingStarted.html#quickstart">Getting Started
-Guide</a>. This will autodetect first the built clang if you are building
-clang, then <tt>clang</tt> in your path and finally look for <tt>llvm-gcc</tt>
-in your path.
-
-<p>Then, run the entire test suite by running make in the <tt>test-suite</tt>
-subdirectory of your build directory:</p>
-
-<div class="doc_code">
-<pre>
-% cd <i>where-you-built-llvm</i>/projects/test-suite
-% gmake
-</pre>
-</div>
-
-<p>Usually, running the "simple" set of tests is a good idea, and you can also
-let it generate a report by running:</p>
-
-<div class="doc_code">
-<pre>
-% cd <i>where-you-built-llvm</i>/projects/test-suite
-% gmake TEST=simple report report.html
-</pre>
-</div>
-
-<p>Any of the above commands can also be run in a subdirectory of
-<tt>projects/test-suite</tt> to run the specified test only on the programs in
-that subdirectory.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
 <h3><a name="quickdebuginfotests">Debugging Information tests</a></h3>
 <div>
 <!-- _______________________________________________________________________ -->
@@ -875,30 +823,15 @@ define two separate CHECK lines that match on the same line.
 </div>
 
 <!--=========================================================================-->
-<h2><a name="testsuitestructure">Test suite Structure</a></h2>
+<h2><a name="testsuiteoverview"><tt>test-suite</tt> Overview</a></h2>
 <!--=========================================================================-->
 
 <div>
 
-<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
-with LLVM and executed. These programs are compiled using the native compiler
-and various LLVM backends. The output from the program compiled with the 
-native compiler is assumed correct; the results from the other programs are
-compared to the native program output and pass if they match.</p>
-
-<p>When executing tests, it is usually a good idea to start out with a subset of
-the available tests or programs. This makes test run times smaller at first and
-later on this is useful to investigate individual test failures. To run some
-test only on a subset of programs, simply change directory to the programs you
-want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
-test using the <tt>TEST</tt> variable to change what tests or run on the
-selected programs (see below for more info).</p>
-
-<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
-performs timing tests of various LLVM optimizations.  It also records
-compilation times for the compilers and the JIT.  This information can be
-used to compare the effectiveness of LLVM's optimizations and code
-generation.</p>
+<p>The <tt>test-suite</tt> module contains a number of programs that can be
+compiled and executed. The <tt>test-suite</tt> includes reference outputs for
+all of the programs, so that the output of the executed program can be checked
+for correctness.</p>
 
 <p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
 SingleSource, and External.</p> 
@@ -920,246 +853,40 @@ go here.</p></li>
 to (i.e., not distributed with) LLVM.  The most prominent members of this
 directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
 directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. The presence and
-location of these external programs is configured by the test-suite
-<tt>configure</tt> script.</p></li>
+how to properly compile these programs from somewhere else. When
+using <tt>LNT</tt>, use the <tt>--test-externals</tt> option to include these
+tests in the results.</p></li>
 </ul>
-
-<p>Each tree is then subdivided into several categories, including applications,
-benchmarks, regression tests, code that is strange grammatically, etc.  These
-organizations should be relatively self explanatory.</p>
-
-<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
-others are features that we haven't added yet (or may never add).  In the
-regression tests, the result for such tests will be XFAIL (eXpected FAILure).
-In this way, you can tell the difference between an expected and unexpected
-failure.</p>
-
-<p>The tests in the test suite have no such feature at this time. If the
-test passes, only warnings and other miscellaneous output will be generated.  If
-a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
-will help you separate benign warnings from actual test failures.</p>
-
 </div>
 
 <!--=========================================================================-->
-<h2><a name="testsuiterun">Running the test suite</a></h2>
+<h2><a name="testsuitequickstart"><tt>test-suite</tt> Quickstart</a></h2>
 <!--=========================================================================-->
 
 <div>
+<p>The modern way of running the <tt>test-suite</tt> is focused on testing and
+benchmarking complete compilers using
+the <a href="http://llvm.org/docs/lnt">LNT</a> testing infrastructure.</p>
 
-<p>First, all tests are executed within the LLVM object directory tree.  They
-<i>are not</i> executed inside of the LLVM source tree. This is because the
-test suite creates temporary files during execution.</p>
-
-<p>To run the test suite, you need to use the following steps:</p>
-
-<ol>
-  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
-  </li>
-
-  <li><p>Check out the <tt>test-suite</tt> module with:</p>
-
-<div class="doc_code">
-<pre>
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-</pre>
+<p>For more information on using LNT to execute the <tt>test-suite</tt>, please
+see the <a href="http://llvm.org/docs/lnt/quickstart.html">LNT Quickstart</a>
+documentation.</p>
 </div>
-    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
-  </li>
-  <li><p>Configure and build <tt>llvm</tt>.</p></li>
-  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
-  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
-  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
-      each build tree (LLVM object directory tree) in which you want
-      to run the test suite, just as you do before building LLVM.</p>
-    <p>During the <em>re-configuration</em>, you must either: (1)
-      have <tt>llvm-gcc</tt> you just built in your path, or (2)
-      specify the directory where your just-built <tt>llvm-gcc</tt> is
-      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
-    <p>You must also tell the configure machinery that the test suite
-      is available so it can be configured for your build tree:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
-</pre>
-</div>
-    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
-    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
-  </li>
 
-  <li><p>You can now run the test suite from your build tree as follows:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT/projects/test-suite
-% make
-</pre>
-</div>
-  </li>
-</ol>
-<p>Note that the second and third steps only need to be done once. After you
-have the suite checked out and configured, you don't need to do it again (unless
-the test code or configure script changes).</p>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteexternal">Configuring External Tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-<p>In order to run the External tests in the <tt>test-suite</tt>
-  module, you must specify <i>--with-externals</i>.  This
-  must be done during the <em>re-configuration</em> step (see above),
-  and the <tt>llvm</tt> re-configuration must recognize the
-  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
-  neglected, the External tests won't work.</p>
-<dl>
-<dt><i>--with-externals</i></dt>
-<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
-</dl>
-  This tells LLVM where to find any external tests.  They are expected to be
-  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
-  If <tt>directory</tt> is left unspecified,
-  <tt>configure</tt> uses the default value
-  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
-  Subdirectory names known to LLVM include:
-  <dl>
-  <dt>spec95</dt>
-  <dt>speccpu2000</dt>
-  <dt>speccpu2006</dt>
-  <dt>povray31</dt>
-  </dl>
-  Others are added from time to time, and can be determined from 
-  <tt>configure</tt>.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitetests">Running different tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
-module also provides a mechanism for compiling the programs in different ways.
-If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
-include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
-This Makefile can modify build rules to yield different results.</p>
-
-<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
-create the nightly test reports.  To run the nightly tests, run <tt>gmake
-TEST=nightly</tt>.</p>
-
-<p>There are several TEST Makefiles available in the tree.  Some of them are
-designed for internal LLVM research and will not work outside of the LLVM
-research group.  They may still be valuable, however, as a guide to writing your
-own TEST Makefile for any optimization or analysis passes that you develop with
-LLVM.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteoutput">Generating test output</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>There are a number of ways to run the tests and generate output. The most
-  simple one is simply running <tt>gmake</tt> with no arguments. This will
-  compile and run all programs in the tree using a number of different methods
-  and compare results. Any failures are reported in the output, but are likely
-  drowned in the other output. Passes are not reported explicitely.</p>
-
-  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
-  the specified test and usually adds per-program summaries to the output
-  (depending on which sometest you use). For example, the <tt>nightly</tt> test
-  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
-  Though these lines are still drowned in the output, it's easy to grep the
-  output logs in the Output directories.</p>
-
-  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
-  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
-  <tt>graphs</tt>). The exact contents of the report are dependent on which
-  <tt>TEST</tt> you are running, but the text results are always shown at the
-  end of the run and the results are always stored in the
-  <tt>report.&lt;type&gt;.format</tt> file (when running with
-  <tt>TEST=&lt;type&gt;</tt>).
-
-  The <tt>report</tt> also generate a file called
-  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
-  run.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitecustom">Writing custom tests for the test suite</a>
-</h3>
-<!-- _______________________________________________________________________ -->
+<!--=========================================================================-->
+<h2><a name="testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></h2>
+<!--=========================================================================-->
 
 <div>
-
-<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
-should work), it is really easy to run optimizations or code generator
-components against every program in the tree, collecting statistics or running
-custom checks for correctness.  At base, this is how the nightly tester works,
-it's just one example of a general framework.</p>
-
-<p>Lets say that you have an LLVM optimization pass, and you want to see how
-many times it triggers.  First thing you should do is add an LLVM
-<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
-will tally counts of things you care about.</p>
-
-<p>Following this, you can set up a test and a report that collects these and
-formats them for easy viewing.  This consists of two files, a
-"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
-test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
-format the output into a table.  There are many example reports of various
-levels of sophistication included with the test suite, and the framework is very
-general.</p>
-
-<p>If you are interested in testing an optimization pass, check out the
-"libcalls" test as an example.  It can be run like this:<p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=libcalls report
-</pre>
-</div>
-
-<p>This will do a bunch of stuff, then eventually print a table like this:</p>
-
-<div class="doc_code">
-<pre>
-Name                                  | total | #exit |
-...
-FreeBench/analyzer/analyzer           | 51    | 6     | 
-FreeBench/fourinarow/fourinarow       | 1     | 1     | 
-FreeBench/neural/neural               | 19    | 9     | 
-FreeBench/pifft/pifft                 | 5     | 3     | 
-MallocBench/cfrac/cfrac               | 1     | *     | 
-MallocBench/espresso/espresso         | 52    | 12    | 
-MallocBench/gs/gs                     | 4     | *     | 
-Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
-Prolangs-C/agrep/agrep                | 33    | 12    | 
-Prolangs-C/allroots/allroots          | *     | *     | 
-Prolangs-C/assembler/assembler        | 47    | *     | 
-Prolangs-C/bison/mybison              | 74    | *     | 
-...
-</pre>
-</div>
-
-<p>This basically is grepping the -stats output and displaying it in a table.
-You can also use the "TEST=libcalls report.html" target to get the table in HTML
-form, similarly for report.csv and report.tex.</p>
-
-<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
-simple: the Makefile indicates how to run the test (in this case, 
-"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
-each column of the output.  The first value is the header for the column and the
-second is the regex to grep the output of the command for.  There are lots of
-example reports that can do fancy stuff.</p>
-
+<p>Historically, the <tt>test-suite</tt> was executed using a complicated setup
+of Makefiles. The LNT based approach above is recommended for most users, but
+there are some testing scenarios which are not supported by the LNT approach. In
+addition, LNT currently uses the Makefile setup under the covers and so
+developers who are interested in how LNT works under the hood may want to
+understand the Makefile based setup.</p>
+
+<p>For more information on the <tt>test-suite</tt> Makefile setup, please see
+the <a href="TestSuiteMakefileGuide.html">Test Suite Makefile Guide.</a></p>
 </div>
 
 </div>
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index e1e4487..f0bdddc 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCAnalysis Analysis
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 typedef enum {
   LLVMAbortProcessAction, /* verifier will print to stderr and abort() */
@@ -48,6 +54,10 @@ LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action);
 void LLVMViewFunctionCFG(LLVMValueRef Fn);
 void LLVMViewFunctionCFGOnly(LLVMValueRef Fn);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index 6db6607..5228035 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCBitReader Bit Reader
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
@@ -59,6 +65,10 @@ LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
                                       LLVMModuleProviderRef *OutMP,
                                       char **OutMessage);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index bcbfb11..ba5a677 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCBitWriter Bit Writer
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 /*===-- Operations on modules ---------------------------------------------===*/
 
@@ -39,6 +45,10 @@ int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
     descriptor. Returns 0 on success. Closes the Handle. */ 
 int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index e0195e0..52b8b1c 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -10,24 +10,6 @@
 |* This header declares the C interface to libLLVMCore.a, which implements    *|
 |* the LLVM intermediate representation.                                      *|
 |*                                                                            *|
-|* LLVM uses a polymorphic type hierarchy which C cannot represent, therefore *|
-|* parameters must be passed as base types. Despite the declared types, most  *|
-|* of the functions provided operate only on branches of the type hierarchy.  *|
-|* The declared parameter names are descriptive and specify which type is     *|
-|* required. Additionally, each type hierarchy is documented along with the   *|
-|* functions that operate upon it. For more detail, refer to LLVM's C++ code. *|
-|* If in doubt, refer to Core.cpp, which performs paramter downcasts in the   *|
-|* form unwrap<RequiredType>(Param).                                          *|
-|*                                                                            *|
-|* Many exotic languages can interoperate with C code but have a harder time  *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages.                                           *|
-|*                                                                            *|
-|* When included into a C++ source file, also declares 'wrap' and 'unwrap'    *|
-|* helpers to perform opaque reference<-->pointer conversions. These helpers  *|
-|* are shorter and more tightly typed than writing the casts by hand when     *|
-|* authoring bindings. In assert builds, they will do runtime type checking.  *|
-|*                                                                            *|
 \*===----------------------------------------------------------------------===*/
 
 #ifndef LLVM_C_CORE_H
@@ -46,50 +28,121 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMC LLVM-C: C interface to LLVM
+ *
+ * This module exposes parts of the LLVM library as a C API.
+ *
+ * @{
+ */
+
+/**
+ * @defgroup LLVMCTransforms Transforms
+ */
+
+/**
+ * @defgroup LLVMCCore Core
+ *
+ * This modules provide an interface to libLLVMCore, which implements
+ * the LLVM intermediate representation as well as other related types
+ * and utilities.
+ *
+ * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
+ * parameters must be passed as base types. Despite the declared types, most
+ * of the functions provided operate only on branches of the type hierarchy.
+ * The declared parameter names are descriptive and specify which type is
+ * required. Additionally, each type hierarchy is documented along with the
+ * functions that operate upon it. For more detail, refer to LLVM's C++ code.
+ * If in doubt, refer to Core.cpp, which performs paramter downcasts in the
+ * form unwrap<RequiredType>(Param).
+ *
+ * Many exotic languages can interoperate with C code but have a harder time
+ * with C++ due to name mangling. So in addition to C, this interface enables
+ * tools written in such languages.
+ *
+ * When included into a C++ source file, also declares 'wrap' and 'unwrap'
+ * helpers to perform opaque reference<-->pointer conversions. These helpers
+ * are shorter and more tightly typed than writing the casts by hand when
+ * authoring bindings. In assert builds, they will do runtime type checking.
+ *
+ * @{
+ */
+
+/**
+ * @defgroup LLVMCCoreTypes Types and Enumerations
+ *
+ * @{
+ */
 
 typedef int LLVMBool;
 
 /* Opaque types. */
 
 /**
- * The top-level container for all LLVM global data.  See the LLVMContext class.
+ * The top-level container for all LLVM global data. See the LLVMContext class.
  */
 typedef struct LLVMOpaqueContext *LLVMContextRef;
 
 /**
  * The top-level container for all other LLVM Intermediate Representation (IR)
- * objects. See the llvm::Module class.
+ * objects.
+ *
+ * @see llvm::Module
  */
 typedef struct LLVMOpaqueModule *LLVMModuleRef;
 
 /**
- * Each value in the LLVM IR has a type, an LLVMTypeRef. See the llvm::Type
- * class.
+ * Each value in the LLVM IR has a type, an LLVMTypeRef.
+ *
+ * @see llvm::Type
  */
 typedef struct LLVMOpaqueType *LLVMTypeRef;
 
+/**
+ * Represents an individual value in LLVM IR.
+ *
+ * This models llvm::Value.
+ */
 typedef struct LLVMOpaqueValue *LLVMValueRef;
+
+/**
+ * Represents a basic block of instruction in LLVM IR.
+ *
+ * This models llvm::BasicBlock.
+ */
 typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
+
+/**
+ * Represents an LLVM basic block builder.
+ *
+ * This models llvm::IRBuilder.
+ */
 typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
 
-/* Interface used to provide a module to JIT or interpreter.  This is now just a
- * synonym for llvm::Module, but we have to keep using the different type to
- * keep binary compatibility.
+/**
+ * Interface used to provide a module to JIT or interpreter.
+ * This is now just a synonym for llvm::Module, but we have to keep using the
+ * different type to keep binary compatibility.
  */
 typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
 
-/* Used to provide a module to JIT or interpreter.
- * See the llvm::MemoryBuffer class.
+/**
+ * Used to provide a module to JIT or interpreter.
+ *
+ * @see llvm::MemoryBuffer
  */
 typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
 
-/** See the llvm::PassManagerBase class. */
+/** @see llvm::PassManagerBase */
 typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
 
-/** See the llvm::PassRegistry class. */
+/** @see llvm::PassRegistry */
 typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
 
-/** Used to get the users and usees of a Value. See the llvm::Use class. */
+/**
+ * Used to get the users and usees of a Value.
+ *
+ * @see llvm::Use */
 typedef struct LLVMOpaqueUse *LLVMUseRef;
 
 typedef enum {
@@ -298,6 +351,10 @@ typedef enum {
   LLVMLandingPadFilter    /**< A filter clause  */
 } LLVMLandingPadClauseTy;
 
+/**
+ * @}
+ */
+
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 
 
@@ -306,49 +363,233 @@ void LLVMInitializeCore(LLVMPassRegistryRef R);
 void LLVMDisposeMessage(char *Message);
 
 
-/*===-- Contexts ----------------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreContext Contexts
+ *
+ * Contexts are execution states for the core LLVM IR system.
+ *
+ * Most types are tied to a context instance. Multiple contexts can
+ * exist simultaneously. A single context is not thread safe. However,
+ * different contexts can execute on different threads simultaneously.
+ *
+ * @{
+ */
 
-/* Create and destroy contexts. */
+/**
+ * Create a new context.
+ *
+ * Every call to this function should be paired with a call to
+ * LLVMContextDispose() or the context will leak memory.
+ */
 LLVMContextRef LLVMContextCreate(void);
+
+/**
+ * Obtain the global context instance.
+ */
 LLVMContextRef LLVMGetGlobalContext(void);
+
+/**
+ * Destroy a context instance.
+ *
+ * This should be called for every call to LLVMContextCreate() or memory
+ * will be leaked.
+ */
 void LLVMContextDispose(LLVMContextRef C);
 
 unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
                                   unsigned SLen);
 unsigned LLVMGetMDKindID(const char* Name, unsigned SLen);
 
-/*===-- Modules -----------------------------------------------------------===*/
+/**
+ * @}
+ */
 
-/* Create and destroy modules. */ 
-/** See llvm::Module::Module. */
+/**
+ * @defgroup LLVMCCoreModule Modules
+ *
+ * Modules represent the top-level structure in a LLVM program. An LLVM
+ * module is effectively a translation unit or a collection of
+ * translation units merged together.
+ *
+ * @{
+ */
+
+/**
+ * Create a new, empty module in the global context.
+ *
+ * This is equivalent to calling LLVMModuleCreateWithNameInContext with
+ * LLVMGetGlobalContext() as the context parameter.
+ *
+ * Every invocation should be paired with LLVMDisposeModule() or memory
+ * will be leaked.
+ */
 LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID);
+
+/**
+ * Create a new, empty module in a specific context.
+ *
+ * Every invocation should be paired with LLVMDisposeModule() or memory
+ * will be leaked.
+ */
 LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
                                                 LLVMContextRef C);
 
-/** See llvm::Module::~Module. */
+/**
+ * Destroy a module instance.
+ *
+ * This must be called for every created module or memory will be
+ * leaked.
+ */
 void LLVMDisposeModule(LLVMModuleRef M);
 
-/** Data layout. See Module::getDataLayout. */
+/**
+ * Obtain the data layout for a module.
+ *
+ * @see Module::getDataLayout()
+ */
 const char *LLVMGetDataLayout(LLVMModuleRef M);
+
+/**
+ * Set the data layout for a module.
+ *
+ * @see Module::setDataLayout()
+ */
 void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple);
 
-/** Target triple. See Module::getTargetTriple. */
+/**
+ * Obtain the target triple for a module.
+ *
+ * @see Module::getTargetTriple()
+ */
 const char *LLVMGetTarget(LLVMModuleRef M);
+
+/**
+ * Set the target triple for a module.
+ *
+ * @see Module::setTargetTriple()
+ */
 void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 
-/** See Module::dump. */
+/**
+ * Dump a representation of a module to stderr.
+ *
+ * @see Module::dump()
+ */
 void LLVMDumpModule(LLVMModuleRef M);
 
-/** See Module::setModuleInlineAsm. */
+/**
+ * Set inline assembly for a module.
+ *
+ * @see Module::setModuleInlineAsm()
+ */
 void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm);
 
-/** See Module::getContext. */
+/**
+ * Obtain the context to which this module is associated.
+ *
+ * @see Module::getContext()
+ */
 LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
 
-/*===-- Types -------------------------------------------------------------===*/
+/**
+ * Obtain a Type from a module by its registered name.
+ */
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+
+/**
+ * Obtain the number of operands for named metadata in a module.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ */
+unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name);
+
+/**
+ * Obtain the named metadata operands for a module.
+ *
+ * The passed LLVMValueRef pointer should refer to an array of
+ * LLVMValueRef at least LLVMGetNamedMetadataNumOperands long. This
+ * array will be populated with the LLVMValueRef instances. Each
+ * instance corresponds to a llvm::MDNode.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ * @see llvm::MDNode::getOperand()
+ */
+void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest);
+
+/**
+ * Add an operand to named metadata.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ * @see llvm::MDNode::addOperand()
+ */
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+                                 LLVMValueRef Val);
 
-/* LLVM types conform to the following hierarchy:
- * 
+/**
+ * Add a function to a module under a specified name.
+ *
+ * @see llvm::Function::Create()
+ */
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain a Function value from a Module by its name.
+ *
+ * The returned value corresponds to a llvm::Function value.
+ *
+ * @see llvm::Module::getFunction()
+ */
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name);
+
+/**
+ * Obtain an iterator to the first Function in a Module.
+ *
+ * @see llvm::Module::begin()
+ */
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M);
+
+/**
+ * Obtain an iterator to the last Function in a Module.
+ *
+ * @see llvm::Module::end()
+ */
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M);
+
+/**
+ * Advance a Function iterator to the next Function.
+ *
+ * Returns NULL if the iterator was already at the end and there are no more
+ * functions.
+ */
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn);
+
+/**
+ * Decrement a Function iterator to the previous Function.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous functions.
+ */
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreType Types
+ *
+ * Types represent the type of a value.
+ *
+ * Types are associated with a context instance. The context internally
+ * deduplicates types so there is only 1 instance of a specific type
+ * alive at a time. In other words, a unique type is shared among all
+ * consumers within a context.
+ *
+ * A Type in the C API corresponds to llvm::Type.
+ *
+ * Types have the following hierarchy:
+ *
  *   types:
  *     integer type
  *     real type
@@ -360,16 +601,44 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
  *     void type
  *     label type
  *     opaque type
+ *
+ * @{
  */
 
-/** See llvm::LLVMTypeKind::getTypeID. */
+/**
+ * Obtain the enumerated type of a Type instance.
+ *
+ * @see llvm::Type:getTypeID()
+ */
 LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty);
+
+/**
+ * Whether the type has a known size.
+ *
+ * Things that don't have a size are abstract types, labels, and void.a
+ *
+ * @see llvm::Type::isSized()
+ */
 LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty);
 
-/** See llvm::LLVMType::getContext. */
+/**
+ * Obtain the context to which this type instance is associated.
+ *
+ * @see llvm::Type::getContext()
+ */
 LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty);
 
-/* Operations on integer types */
+/**
+ * @defgroup LLVMCCoreTypeInt Integer Types
+ *
+ * Functions in this section operate on integer types.
+ *
+ * @{
+ */
+
+/**
+ * Obtain an integer type from a context with specified bit width.
+ */
 LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
@@ -377,6 +646,10 @@ LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
 
+/**
+ * Obtain an integer type from the global context with a specified bit
+ * width.
+ */
 LLVMTypeRef LLVMInt1Type(void);
 LLVMTypeRef LLVMInt8Type(void);
 LLVMTypeRef LLVMInt16Type(void);
@@ -385,14 +658,52 @@ LLVMTypeRef LLVMInt64Type(void);
 LLVMTypeRef LLVMIntType(unsigned NumBits);
 unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
 
-/* Operations on real types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeFloat Floating Point Types
+ *
+ * @{
+ */
+
+/**
+ * Obtain a 16-bit floating point type from a context.
+ */
 LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 32-bit floating point type from a context.
+ */
 LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 64-bit floating point type from a context.
+ */
 LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 80-bit floating point type (X87) from a context.
+ */
 LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 128-bit floating point type (112-bit mantissa) from a
+ * context.
+ */
 LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 128-bit floating point type (two 64-bits) from a context.
+ */
 LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C);
 
+/**
+ * Obtain a floating point type from the global context.
+ *
+ * These map to the functions in this group of the same name.
+ */
 LLVMTypeRef LLVMHalfType(void);
 LLVMTypeRef LLVMFloatType(void);
 LLVMTypeRef LLVMDoubleType(void);
@@ -400,55 +711,283 @@ LLVMTypeRef LLVMX86FP80Type(void);
 LLVMTypeRef LLVMFP128Type(void);
 LLVMTypeRef LLVMPPCFP128Type(void);
 
-/* Operations on function types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeFunction Function Types
+ *
+ * @{
+ */
+
+/**
+ * Obtain a function type consisting of a specified signature.
+ *
+ * The function is defined as a tuple of a return Type, a list of
+ * parameter types, and whether the function is variadic.
+ */
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
                              LLVMBool IsVarArg);
+
+/**
+ * Returns whether a function type is variadic.
+ */
 LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the Type this function Type returns.
+ */
 LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the number of parameters this function accepts.
+ */
 unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the types of a function's parameters.
+ *
+ * The Dest parameter should point to a pre-allocated array of
+ * LLVMTypeRef at least LLVMCountParamTypes() large. On return, the
+ * first LLVMCountParamTypes() entries in the array will be populated
+ * with LLVMTypeRef instances.
+ *
+ * @param FunctionTy The function type to operate on.
+ * @param Dest Memory address of an array to be filled with result.
+ */
 void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest);
 
-/* Operations on struct types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeStruct Structure Types
+ *
+ * These functions relate to LLVMTypeRef instances.
+ *
+ * @see llvm::StructType
+ *
+ * @{
+ */
+
+/**
+ * Create a new structure type in a context.
+ *
+ * A structure is specified by a list of inner elements/types and
+ * whether these can be packed together.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                                     unsigned ElementCount, LLVMBool Packed);
+
+/**
+ * Create a new structure type in the global context.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
                            LLVMBool Packed);
+
+/**
+ * Create an empty structure in a context having a specified name.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name);
+
+/**
+ * Obtain the name of a structure.
+ *
+ * @see llvm::StructType::getName()
+ */
 const char *LLVMGetStructName(LLVMTypeRef Ty);
+
+/**
+ * Set the contents of a structure type.
+ *
+ * @see llvm::StructType::setBody()
+ */
 void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
                        unsigned ElementCount, LLVMBool Packed);
 
+/**
+ * Get the number of elements defined inside the structure.
+ *
+ * @see llvm::StructType::getNumElements()
+ */
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
+
+/**
+ * Get the elements within a structure.
+ *
+ * The function is passed the address of a pre-allocated array of
+ * LLVMTypeRef at least LLVMCountStructElementTypes() long. After
+ * invocation, this array will be populated with the structure's
+ * elements. The objects in the destination array will have a lifetime
+ * of the structure type itself, which is the lifetime of the context it
+ * is contained in.
+ */
 void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
+
+/**
+ * Determine whether a structure is packed.
+ *
+ * @see llvm::StructType::isPacked()
+ */
 LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
+
+/**
+ * Determine whether a structure is opaque.
+ *
+ * @see llvm::StructType::isOpaque()
+ */
 LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
 
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+/**
+ * @}
+ */
 
-/* Operations on array, pointer, and vector types (sequence types) */
-LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
-LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
-LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
 
+/**
+ * @defgroup LLVMCCoreTypeSequential Sequential Types
+ *
+ * Sequential types represents "arrays" of types. This is a super class
+ * for array, vector, and pointer types.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the type of elements within a sequential type.
+ *
+ * This works on array, vector, and pointer types.
+ *
+ * @see llvm::SequentialType::getElementType()
+ */
 LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty);
+
+/**
+ * Create a fixed size array type that refers to a specific type.
+ *
+ * The created type will exist in the context that its element type
+ * exists in.
+ *
+ * @see llvm::ArrayType::get()
+ */
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
+
+/**
+ * Obtain the length of an array type.
+ *
+ * This only works on types that represent arrays.
+ *
+ * @see llvm::ArrayType::getNumElements()
+ */
 unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy);
+
+/**
+ * Create a pointer type that points to a defined type.
+ *
+ * The created type will exist in the context that its pointee type
+ * exists in.
+ *
+ * @see llvm::PointerType::get()
+ */
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
+
+/**
+ * Obtain the address space of a pointer type.
+ *
+ * This only works on types that represent pointers.
+ *
+ * @see llvm::PointerType::getAddressSpace()
+ */
 unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy);
+
+/**
+ * Create a vector type that contains a defined type and has a specific
+ * number of elements.
+ *
+ * The created type will exist in the context thats its element type
+ * exists in.
+ *
+ * @see llvm::VectorType::get()
+ */
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
+
+/**
+ * Obtain the number of elements in a vector type.
+ *
+ * This only works on types that represent vectors.
+ *
+ * @see llvm::VectorType::getNumElements()
+ */
 unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
 
-/* Operations on other types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeOther Other Types
+ *
+ * @{
+ */
+
+/**
+ * Create a void type in a context.
+ */
 LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
+
+/**
+ * Create a label type in a context.
+ */
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
+
+/**
+ * Create a X86 MMX type in a context.
+ */
 LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
 
+/**
+ * These are similar to the above functions except they operate on the
+ * global context.
+ */
 LLVMTypeRef LLVMVoidType(void);
 LLVMTypeRef LLVMLabelType(void);
 LLVMTypeRef LLVMX86MMXType(void);
 
-/*===-- Values ------------------------------------------------------------===*/
+/**
+ * @}
+ */
 
-/* The bulk of LLVM's object model consists of values, which comprise a very
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValues Values
+ *
+ * The bulk of LLVM's object model consists of values, which comprise a very
  * rich type hierarchy.
+ *
+ * LLVMValueRef essentially represents llvm::Value. There is a rich
+ * hierarchy of classes within this type. Depending on the instance
+ * obtain, not all APIs are available.
+ *
+ * Callers can determine the type of a LLVMValueRef by calling the
+ * LLVMIsA* family of functions (e.g. LLVMIsAArgument()). These
+ * functions are defined by a macro, so it isn't obvious which are
+ * available by looking at the Doxygen source code. Instead, look at the
+ * source definition of LLVM_FOR_EACH_VALUE_SUBCLASS and note the list
+ * of value names given. These value names also correspond to classes in
+ * the llvm::Value hierarchy.
+ *
+ * @{
  */
 
 #define LLVM_FOR_EACH_VALUE_SUBCLASS(macro) \
@@ -522,92 +1061,399 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(LoadInst)                       \
       macro(VAArgInst)
 
-/* Operations on all values */
+/**
+ * @defgroup LLVMCCoreValueGeneral General APIs
+ *
+ * Functions in this section work on all LLVMValueRef instances,
+ * regardless of their sub-type. They correspond to functions available
+ * on llvm::Value.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the type of a value.
+ *
+ * @see llvm::Value::getType()
+ */
 LLVMTypeRef LLVMTypeOf(LLVMValueRef Val);
+
+/**
+ * Obtain the string name of a value.
+ *
+ * @see llvm::Value::getName()
+ */
 const char *LLVMGetValueName(LLVMValueRef Val);
+
+/**
+ * Set the string name of a value.
+ *
+ * @see llvm::Value::setName()
+ */
 void LLVMSetValueName(LLVMValueRef Val, const char *Name);
+
+/**
+ * Dump a representation of a value to stderr.
+ *
+ * @see llvm::Value::dump()
+ */
 void LLVMDumpValue(LLVMValueRef Val);
+
+/**
+ * Replace all uses of a value with another one.
+ *
+ * @see llvm::Value::replaceAllUsesWith()
+ */
 void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal);
-int LLVMHasMetadata(LLVMValueRef Val);
-LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
-void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
 
-/* Conversion functions. Return the input value if it is an instance of the
-   specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */
+/**
+ * Determine whether the specified constant instance is constant.
+ */
+LLVMBool LLVMIsConstant(LLVMValueRef Val);
+
+/**
+ * Determine whether a value instance is undefined.
+ */
+LLVMBool LLVMIsUndef(LLVMValueRef Val);
+
+/**
+ * Convert value instances between types.
+ *
+ * Internally, a LLVMValueRef is "pinned" to a specific type. This
+ * series of functions allows you to cast an instance to a specific
+ * type.
+ *
+ * If the cast is not valid for the specified type, NULL is returned.
+ *
+ * @see llvm::dyn_cast_or_null<>
+ */
 #define LLVM_DECLARE_VALUE_CAST(name) \
   LLVMValueRef LLVMIsA##name(LLVMValueRef Val);
 LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
 
-/* Operations on Uses */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueUses Usage
+ *
+ * This module defines functions that allow you to inspect the uses of a
+ * LLVMValueRef.
+ *
+ * It is possible to obtain a LLVMUseRef for any LLVMValueRef instance.
+ * Each LLVMUseRef (which corresponds to a llvm::Use instance) holds a
+ * llvm::User and llvm::Value.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the first use of a value.
+ *
+ * Uses are obtained in an iterator fashion. First, call this function
+ * to obtain a reference to the first use. Then, call LLVMGetNextUse()
+ * on that instance and all subsequently obtained instances untl
+ * LLVMGetNextUse() returns NULL.
+ *
+ * @see llvm::Value::use_begin()
+ */
 LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val);
+
+/**
+ * Obtain the next use of a value.
+ *
+ * This effectively advances the iterator. It returns NULL if you are on
+ * the final use and no more are available.
+ */
 LLVMUseRef LLVMGetNextUse(LLVMUseRef U);
+
+/**
+ * Obtain the user value for a user.
+ *
+ * The returned value corresponds to a llvm::User type.
+ *
+ * @see llvm::Use::getUser()
+ */
 LLVMValueRef LLVMGetUser(LLVMUseRef U);
+
+/**
+ * Obtain the value this use corresponds to.
+ *
+ * @see llvm::Use::get().
+ */
 LLVMValueRef LLVMGetUsedValue(LLVMUseRef U);
 
-/* Operations on Users and metadata */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueUser User value
+ *
+ * Function in this group pertain to LLVMValueRef instances that descent
+ * from llvm::User. This includes constants, instructions, and
+ * operators.
+ *
+ * @{
+ */
+
+/**
+ * Obtain an operand at a specific index in a llvm::User value.
+ *
+ * @see llvm::User::getOperand()
+ */
 LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
+
+/**
+ * Set an operand at a specific index in a llvm::User value.
+ *
+ * @see llvm::User::setOperand()
+ */
 void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val);
+
+/**
+ * Obtain the number of operands in a llvm::User value.
+ *
+ * @see llvm::User::getNumOperands()
+ */
 int LLVMGetNumOperands(LLVMValueRef Val);
 
-/* Operations on constants of any type */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstant Constants
+ *
+ * This section contains APIs for interacting with LLVMValueRef that
+ * correspond to llvm::Constant instances.
+ *
+ * These functions will work for any LLVMValueRef in the llvm::Constant
+ * class hierarchy.
+ *
+ * @{
+ */
+
+/**
+ * Obtain a constant value referring to the null instance of a type.
+ *
+ * @see llvm::Constant::getNullValue()
+ */
 LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
-LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */
+
+/**
+ * Obtain a constant value referring to the instance of a type
+ * consisting of all ones.
+ *
+ * This is only valid for integer types.
+ *
+ * @see llvm::Constant::getAllOnesValue()
+ */
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty);
+
+/**
+ * Obtain a constant value referring to an undefined value of a type.
+ *
+ * @see llvm::UndefValue::get()
+ */
 LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
-LLVMBool LLVMIsConstant(LLVMValueRef Val);
+
+/**
+ * Determine whether a value instance is null.
+ *
+ * @see llvm::Constant::isNullValue()
+ */
 LLVMBool LLVMIsNull(LLVMValueRef Val);
-LLVMBool LLVMIsUndef(LLVMValueRef Val);
+
+/**
+ * Obtain a constant that is a constant pointer pointing to NULL for a
+ * specified type.
+ */
 LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty);
 
-/* Operations on metadata */
-LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
-                                   unsigned SLen);
-LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
-LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
-                                 unsigned Count);
-LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
-const char  *LLVMGetMDString(LLVMValueRef V, unsigned* Len);
-unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name);
-void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest);
-void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
-                                 LLVMValueRef Val);
+/**
+ * @defgroup LLVMCCoreValueConstantScalar Scalar constants
+ *
+ * Functions in this group model LLVMValueRef instances that correspond
+ * to constants referring to scalar types.
+ *
+ * For integer types, the LLVMTypeRef parameter should correspond to a
+ * llvm::IntegerType instance and the returned LLVMValueRef will
+ * correspond to a llvm::ConstantInt.
+ *
+ * For floating point types, the LLVMTypeRef returned corresponds to a
+ * llvm::ConstantFP.
+ *
+ * @{
+ */
 
-/* Operations on scalar constants */
+/**
+ * Obtain a constant value for an integer type.
+ *
+ * The returned value corresponds to a llvm::ConstantInt.
+ *
+ * @see llvm::ConstantInt::get()
+ *
+ * @param IntTy Integer type to obtain value of.
+ * @param N The value the returned instance should refer to.
+ * @param SignExtend Whether to sign extend the produced value.
+ */
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
                           LLVMBool SignExtend);
+
+/**
+ * Obtain a constant value for an integer of arbitrary precision.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
                                               unsigned NumWords,
                                               const uint64_t Words[]);
+
+/**
+ * Obtain a constant value for an integer parsed from a string.
+ *
+ * A similar API, LLVMConstIntOfStringAndSize is also available. If the
+ * string's length is available, it is preferred to call that function
+ * instead.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
                                   uint8_t Radix);
+
+/**
+ * Obtain a constant value for an integer parsed from a string with
+ * specified length.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
                                          unsigned SLen, uint8_t Radix);
+
+/**
+ * Obtain a constant value referring to a double floating point value.
+ */
 LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N);
+
+/**
+ * Obtain a constant for a floating point value parsed from a string.
+ *
+ * A similar API, LLVMConstRealOfStringAndSize is also available. It
+ * should be used if the input string's length is known.
+ */
 LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text);
+
+/**
+ * Obtain a constant for a floating point value parsed from a string.
+ */
 LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text,
                                           unsigned SLen);
+
+/**
+ * Obtain the zero extended value for an integer constant value.
+ *
+ * @see llvm::ConstantInt::getZExtValue()
+ */
 unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal);
+
+/**
+ * Obtain the sign extended value for an integer constant value.
+ *
+ * @see llvm::ConstantInt::getSExtValue()
+ */
 long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal);
 
+/**
+ * @}
+ */
 
-/* Operations on composite constants */
+/**
+ * @defgroup LLVMCCoreValueConstantComposite Composite Constants
+ *
+ * Functions in this group operate on composite constants.
+ *
+ * @{
+ */
+
+/**
+ * Create a ConstantDataSequential and initialize it with a string.
+ *
+ * @see llvm::ConstantDataArray::getString()
+ */
 LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
                                       unsigned Length, LLVMBool DontNullTerminate);
-LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
-                                      LLVMValueRef *ConstantVals,
-                                      unsigned Count, LLVMBool Packed);
 
+/**
+ * Create a ConstantDataSequential with string content in the global context.
+ *
+ * This is the same as LLVMConstStringInContext except it operates on the
+ * global context.
+ *
+ * @see LLVMConstStringInContext()
+ * @see llvm::ConstantDataArray::getString()
+ */
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
                              LLVMBool DontNullTerminate);
-LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
-                            LLVMValueRef *ConstantVals, unsigned Length);
+
+/**
+ * Create an anonymous ConstantStruct with the specified values.
+ *
+ * @see llvm::ConstantStruct::getAnon()
+ */
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed);
+
+/**
+ * Create a ConstantStruct in the global Context.
+ *
+ * This is the same as LLVMConstStructInContext except it operates on the
+ * global Context.
+ *
+ * @see LLVMConstStructInContext()
+ */
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed);
+
+/**
+ * Create a ConstantArray from values.
+ *
+ * @see llvm::ConstantArray::get()
+ */
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length);
+
+/**
+ * Create a non-anonymous ConstantStruct from values.
+ *
+ * @see llvm::ConstantStruct::get()
+ */
 LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
                                   LLVMValueRef *ConstantVals,
                                   unsigned Count);
+
+/**
+ * Create a ConstantVector from values.
+ *
+ * @see llvm::ConstantVector::get()
+ */
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
 
-/* Constant expressions */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstantExpressions Constant Expressions
+ *
+ * Functions in this group correspond to APIs on llvm::ConstantExpr.
+ *
+ * @see llvm::ConstantExpr.
+ *
+ * @{
+ */
 LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal);
 LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty);
 LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty);
@@ -694,7 +1540,21 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty,
                                 LLVMBool HasSideEffects, LLVMBool IsAlignStack);
 LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB);
 
-/* Operations on global variables, functions, and aliases (globals) */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstantGlobals Global Values
+ *
+ * This group contains functions that operate on global values. Functions in
+ * this group relate to functions in the llvm::GlobalValue class tree.
+ *
+ * @see llvm::GlobalValue
+ *
+ * @{
+ */
+
 LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global);
 LLVMBool LLVMIsDeclaration(LLVMValueRef Global);
 LLVMLinkage LLVMGetLinkage(LLVMValueRef Global);
@@ -706,7 +1566,15 @@ void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz);
 unsigned LLVMGetAlignment(LLVMValueRef Global);
 void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes);
 
-/* Operations on global variables */
+/**
+ * @defgroup LLVMCoreValueConstantGlobalVariable Global Variables
+ *
+ * This group contains functions that operate on global variable values.
+ *
+ * @see llvm::GlobalVariable
+ *
+ * @{
+ */
 LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name);
 LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
                                          const char *Name,
@@ -724,110 +1592,672 @@ void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
 LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
 void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
 
-/* Operations on aliases */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCoreValueConstantGlobalAlias Global Aliases
+ *
+ * This group contains function that operate on global alias values.
+ *
+ * @see llvm::GlobalAlias
+ *
+ * @{
+ */
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
                           const char *Name);
 
-/* Operations on functions */
-LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
-                             LLVMTypeRef FunctionTy);
-LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name);
-LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M);
-LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M);
-LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn);
-LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueFunction Function values
+ *
+ * Functions in this group operate on LLVMValueRef instances that
+ * correspond to llvm::Function instances.
+ *
+ * @see llvm::Function
+ *
+ * @{
+ */
+
+/**
+ * Remove a function from its containing module and deletes it.
+ *
+ * @see llvm::Function::eraseFromParent()
+ */
 void LLVMDeleteFunction(LLVMValueRef Fn);
+
+/**
+ * Obtain the ID number from a function instance.
+ *
+ * @see llvm::Function::getIntrinsicID()
+ */
 unsigned LLVMGetIntrinsicID(LLVMValueRef Fn);
+
+/**
+ * Obtain the calling function of a function.
+ *
+ * The returned value corresponds to the LLVMCallConv enumeration.
+ *
+ * @see llvm::Function::getCallingConv()
+ */
 unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn);
+
+/**
+ * Set the calling convention of a function.
+ *
+ * @see llvm::Function::setCallingConv()
+ *
+ * @param Fn Function to operate on
+ * @param CC LLVMCallConv to set calling convention to
+ */
 void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC);
+
+/**
+ * Obtain the name of the garbage collector to use during code
+ * generation.
+ *
+ * @see llvm::Function::getGC()
+ */
 const char *LLVMGetGC(LLVMValueRef Fn);
+
+/**
+ * Define the garbage collector to use during code generation.
+ *
+ * @see llvm::Function::setGC()
+ */
 void LLVMSetGC(LLVMValueRef Fn, const char *Name);
+
+/**
+ * Add an attribute to a function.
+ *
+ * @see llvm::Function::addAttribute()
+ */
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+
+/**
+ * Obtain an attribute from a function.
+ *
+ * @see llvm::Function::getAttributes()
+ */
 LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn);
+
+/**
+ * Remove an attribute from a function.
+ */
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
 
-/* Operations on parameters */
+/**
+ * @defgroup LLVMCCoreValueFunctionParameters Function Parameters
+ *
+ * Functions in this group relate to arguments/parameters on functions.
+ *
+ * Functions in this group expect LLVMValueRef instances that correspond
+ * to llvm::Function instances.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the number of parameters in a function.
+ *
+ * @see llvm::Function::arg_size()
+ */
 unsigned LLVMCountParams(LLVMValueRef Fn);
+
+/**
+ * Obtain the parameters in a function.
+ *
+ * The takes a pointer to a pre-allocated array of LLVMValueRef that is
+ * at least LLVMCountParams() long. This array will be filled with
+ * LLVMValueRef instances which correspond to the parameters the
+ * function receives. Each LLVMValueRef corresponds to a llvm::Argument
+ * instance.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 void LLVMGetParams(LLVMValueRef Fn, LLVMValueRef *Params);
+
+/**
+ * Obtain the parameter at the specified index.
+ *
+ * Parameters are indexed from 0.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 LLVMValueRef LLVMGetParam(LLVMValueRef Fn, unsigned Index);
+
+/**
+ * Obtain the function to which this argument belongs.
+ *
+ * Unlike other functions in this group, this one takes a LLVMValueRef
+ * that corresponds to a llvm::Attribute.
+ *
+ * The returned LLVMValueRef is the llvm::Function to which this
+ * argument belongs.
+ */
 LLVMValueRef LLVMGetParamParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the first parameter to a function.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn);
+
+/**
+ * Obtain the last parameter to a function.
+ *
+ * @see llvm::Function::arg_end()
+ */
 LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn);
+
+/**
+ * Obtain the next parameter to a function.
+ *
+ * This takes a LLVMValueRef obtained from LLVMGetFirstParam() (which is
+ * actually a wrapped iterator) and obtains the next parameter from the
+ * underlying iterator.
+ */
 LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg);
+
+/**
+ * Obtain the previous parameter to a function.
+ *
+ * This is the opposite of LLVMGetNextParam().
+ */
 LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg);
+
+/**
+ * Add an attribute to a function argument.
+ *
+ * @see llvm::Argument::addAttr()
+ */
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+
+/**
+ * Remove an attribute from a function argument.
+ *
+ * @see llvm::Argument::removeAttr()
+ */
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+
+/**
+ * Get an attribute from a function argument.
+ */
 LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
+
+/**
+ * Set the alignment for a function parameter.
+ *
+ * @see llvm::Argument::addAttr()
+ * @see llvm::Attribute::constructAlignmentFromInt()
+ */
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
-/* Operations on basic blocks */
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueMetadata Metadata
+ *
+ * @{
+ */
+
+/**
+ * Obtain a MDString value from a context.
+ *
+ * The returned instance corresponds to the llvm::MDString class.
+ *
+ * The instance is specified by string data of a specified length. The
+ * string content is copied, so the backing memory can be freed after
+ * this function returns.
+ */
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen);
+
+/**
+ * Obtain a MDString value from the global context.
+ */
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+
+/**
+ * Obtain a MDNode value from a context.
+ *
+ * The returned value corresponds to the llvm::MDNode class.
+ */
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count);
+
+/**
+ * Obtain a MDNode value from the global context.
+ */
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+
+/**
+ * Obtain the underlying string from a MDString value.
+ *
+ * @param V Instance to obtain string from.
+ * @param Len Memory address which will hold length of returned string.
+ * @return String data in MDString.
+ */
+const char  *LLVMGetMDString(LLVMValueRef V, unsigned* Len);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueBasicBlock Basic Block
+ *
+ * A basic block represents a single entry single exit section of code.
+ * Basic blocks contain a list of instructions which form the body of
+ * the block.
+ *
+ * Basic blocks belong to functions. They have the type of label.
+ *
+ * Basic blocks are themselves values. However, the C API models them as
+ * LLVMBasicBlockRef.
+ *
+ * @see llvm::BasicBlock
+ *
+ * @{
+ */
+
+/**
+ * Convert a basic block instance to a value type.
+ */
 LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB);
+
+/**
+ * Determine whether a LLVMValueRef is itself a basic block.
+ */
 LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val);
+
+/**
+ * Convert a LLVMValueRef to a LLVMBasicBlockRef instance.
+ */
 LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val);
+
+/**
+ * Obtain the function to which a basic block belongs.
+ *
+ * @see llvm::BasicBlock::getParent()
+ */
 LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the terminator instruction for a basic block.
+ *
+ * If the basic block does not have a terminator (it is not well-formed
+ * if it doesn't), then NULL is returned.
+ *
+ * The returned LLVMValueRef corresponds to a llvm::TerminatorInst.
+ *
+ * @see llvm::BasicBlock::getTerminator()
+ */
 LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the number of basic blocks in a function.
+ *
+ * @param Fn Function value to operate on.
+ */
 unsigned LLVMCountBasicBlocks(LLVMValueRef Fn);
+
+/**
+ * Obtain all of the basic blocks in a function.
+ *
+ * This operates on a function value. The BasicBlocks parameter is a
+ * pointer to a pre-allocated array of LLVMBasicBlockRef of at least
+ * LLVMCountBasicBlocks() in length. This array is populated with
+ * LLVMBasicBlockRef instances.
+ */
 void LLVMGetBasicBlocks(LLVMValueRef Fn, LLVMBasicBlockRef *BasicBlocks);
+
+/**
+ * Obtain the first basic block in a function.
+ *
+ * The returned basic block can be used as an iterator. You will likely
+ * eventually call into LLVMGetNextBasicBlock() with it.
+ *
+ * @see llvm::Function::begin()
+ */
 LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn);
+
+/**
+ * Obtain the last basic block in a function.
+ *
+ * @see llvm::Function::end()
+ */
 LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn);
+
+/**
+ * Advance a basic block iterator.
+ */
 LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Go backwards in a basic block iterator.
+ */
 LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the basic block that corresponds to the entry point of a
+ * function.
+ *
+ * @see llvm::Function::getEntryBlock()
+ */
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
 
+/**
+ * Append a basic block to the end of a function.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
                                                 LLVMValueRef Fn,
                                                 const char *Name);
+
+/**
+ * Append a basic block to the end of a function using the global
+ * context.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
+
+/**
+ * Insert a basic block in a function before another basic block.
+ *
+ * The function to add to is determined by the function of the
+ * passed basic block.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
                                                 LLVMBasicBlockRef BB,
                                                 const char *Name);
 
-LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
+/**
+ * Insert a basic block in a function using the global context.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB,
                                        const char *Name);
+
+/**
+ * Remove a basic block from a function and delete it.
+ *
+ * This deletes the basic block from its containing function and deletes
+ * the basic block itself.
+ *
+ * @see llvm::BasicBlock::eraseFromParent()
+ */
 void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Remove a basic block from a function.
+ *
+ * This deletes the basic block from its containing function but keep
+ * the basic block alive.
+ *
+ * @see llvm::BasicBlock::removeFromParent()
+ */
 void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BB);
 
+/**
+ * Move a basic block to before another one.
+ *
+ * @see llvm::BasicBlock::moveBefore()
+ */
 void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+
+/**
+ * Move a basic block to after another one.
+ *
+ * @see llvm::BasicBlock::moveAfter()
+ */
 void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
 
+/**
+ * Obtain the first instruction in a basic block.
+ *
+ * The returned LLVMValueRef corresponds to a llvm::Instruction
+ * instance.
+ */
 LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the last instruction in a basic block.
+ *
+ * The returned LLVMValueRef corresponds to a LLVM:Instruction.
+ */
 LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB);
 
-/* Operations on instructions */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueInstruction Instructions
+ *
+ * Functions in this group relate to the inspection and manipulation of
+ * individual instructions.
+ *
+ * In the C++ API, an instruction is modeled by llvm::Instruction. This
+ * class has a large number of descendents. llvm::Instruction is a
+ * llvm::Value and in the C API, instructions are modeled by
+ * LLVMValueRef.
+ *
+ * This group also contains sub-groups which operate on specific
+ * llvm::Instruction types, e.g. llvm::CallInst.
+ *
+ * @{
+ */
+
+/**
+ * Determine whether an instruction has any metadata attached.
+ */
+int LLVMHasMetadata(LLVMValueRef Val);
+
+/**
+ * Return metadata associated with an instruction value.
+ */
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
+
+/**
+ * Set metadata associated with an instruction value.
+ */
+void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
+
+/**
+ * Obtain the basic block to which an instruction belongs.
+ *
+ * @see llvm::Instruction::getParent()
+ */
 LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the instruction that occurs after the one specified.
+ *
+ * The next instruction will be from the same basic block.
+ *
+ * If this is the last instruction in a basic block, NULL will be
+ * returned.
+ */
 LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst);
+
+/**
+ * Obtain the instruction that occured before this one.
+ *
+ * If the instruction is the first instruction in a basic block, NULL
+ * will be returned.
+ */
 LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst);
+
+/**
+ * Remove and delete an instruction.
+ *
+ * The instruction specified is removed from its containing building
+ * block and then deleted.
+ *
+ * @see llvm::Instruction::eraseFromParent()
+ */
 void LLVMInstructionEraseFromParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the code opcode for an individual instruction.
+ *
+ * @see llvm::Instruction::getOpCode()
+ */
 LLVMOpcode   LLVMGetInstructionOpcode(LLVMValueRef Inst);
+
+/**
+ * Obtain the predicate of an instruction.
+ *
+ * This is only valid for instructions that correspond to llvm::ICmpInst
+ * or llvm::ConstantExpr whose opcode is llvm::Instruction::ICmp.
+ *
+ * @see llvm::ICmpInst::getPredicate()
+ */
 LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst);
 
-/* Operations on call sites */
+/**
+ * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations
+ *
+ * Functions in this group apply to instructions that refer to call
+ * sites and invocations. These correspond to C++ types in the
+ * llvm::CallInst class tree.
+ *
+ * @{
+ */
+
+/**
+ * Set the calling convention for a call instruction.
+ *
+ * This expects an LLVMValueRef that corresponds to a llvm::CallInst or
+ * llvm::InvokeInst.
+ *
+ * @see llvm::CallInst::setCallingConv()
+ * @see llvm::InvokeInst::setCallingConv()
+ */
 void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
+
+/**
+ * Obtain the calling convention for a call instruction.
+ *
+ * This is the opposite of LLVMSetInstructionCallConv(). Reads its
+ * usage.
+ *
+ * @see LLVMSetInstructionCallConv()
+ */
 unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
+
+
 void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute);
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
                               LLVMAttribute);
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned align);
 
-/* Operations on call instructions (only) */
+/**
+ * Obtain whether a call instruction is a tail call.
+ *
+ * This only works on llvm::CallInst instructions.
+ *
+ * @see llvm::CallInst::isTailCall()
+ */
 LLVMBool LLVMIsTailCall(LLVMValueRef CallInst);
+
+/**
+ * Set whether a call instruction is a tail call.
+ *
+ * This only works on llvm::CallInst instructions.
+ *
+ * @see llvm::CallInst::setTailCall()
+ */
 void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall);
 
-/* Operations on switch instructions (only) */
+/**
+ * @}
+ */
+
+/**
+ * Obtain the default destination basic block of a switch instruction.
+ *
+ * This only works on llvm::SwitchInst instructions.
+ *
+ * @see llvm::SwitchInst::getDefaultDest()
+ */
 LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef SwitchInstr);
 
-/* Operations on phi nodes */
+/**
+ * @defgroup LLVMCCoreValueInstructionPHINode PHI Nodes
+ *
+ * Functions in this group only apply to instructions that map to
+ * llvm::PHINode instances.
+ *
+ * @{
+ */
+
+/**
+ * Add an incoming value to the end of a PHI list.
+ */
 void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
                      LLVMBasicBlockRef *IncomingBlocks, unsigned Count);
+
+/**
+ * Obtain the number of incoming basic blocks to a PHI node.
+ */
 unsigned LLVMCountIncoming(LLVMValueRef PhiNode);
+
+/**
+ * Obtain an incoming value to a PHI node as a LLVMValueRef.
+ */
 LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index);
+
+/**
+ * Obtain an incoming value to a PHI node as a LLVMBasicBlockRef.
+ */
 LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index);
 
-/*===-- Instruction builders ----------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
 
-/* An instruction builder represents a point within a basic block, and is the
- * exclusive means of building instructions using the C interface.
+/**
+ * @defgroup LLVMCCoreInstructionBuilder Instruction Builders
+ *
+ * An instruction builder represents a point within a basic block and is
+ * the exclusive means of building instructions using the C interface.
+ *
+ * @{
  */
 
 LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C);
@@ -1048,21 +2478,37 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name);
 
+/**
+ * @}
+ */
 
-/*===-- Module providers --------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreModuleProvider Module Providers
+ *
+ * @{
+ */
 
-/* Changes the type of M so it can be passed to FunctionPassManagers and the
+/**
+ * Changes the type of M so it can be passed to FunctionPassManagers and the
  * JIT.  They take ModuleProviders for historical reasons.
  */
 LLVMModuleProviderRef
 LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M);
 
-/* Destroys the module M.
+/**
+ * Destroys the module M.
  */
 void LLVMDisposeModuleProvider(LLVMModuleProviderRef M);
 
+/**
+ * @}
+ */
 
-/*===-- Memory buffers ----------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreMemoryBuffers Memory Buffers
+ *
+ * @{
+ */
 
 LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
                                                   LLVMMemoryBufferRef *OutMemBuf,
@@ -1071,23 +2517,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                          char **OutMessage);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
-/*===-- Pass Registry -----------------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCorePassRegistry Pass Registry
+ *
+ * @{
+ */
 
 /** Return the global pass registry, for use with initialization functions.
-    See llvm::PassRegistry::getPassRegistry. */
+    @see llvm::PassRegistry::getPassRegistry */
 LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
 
-/*===-- Pass Managers -----------------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCorePassManagers Pass Managers
+ *
+ * @{
+ */
 
 /** Constructs a new whole-module pass pipeline. This type of pipeline is
     suitable for link-time optimization and whole-module transformations.
-    See llvm::PassManager::PassManager. */
+    @see llvm::PassManager::PassManager */
 LLVMPassManagerRef LLVMCreatePassManager(void);
 
 /** Constructs a new function-by-function pass pipeline over the module
     provider. It does not take ownership of the module provider. This type of
     pipeline is suitable for code generation and JIT compilation tasks.
-    See llvm::FunctionPassManager::FunctionPassManager. */
+    @see llvm::FunctionPassManager::FunctionPassManager */
 LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M);
 
 /** Deprecated: Use LLVMCreateFunctionPassManagerForModule instead. */
@@ -1095,30 +2557,42 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP);
 
 /** Initializes, executes on the provided module, and finalizes all of the
     passes scheduled in the pass manager. Returns 1 if any of the passes
-    modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */
+    modified the module, 0 otherwise.
+    @see llvm::PassManager::run(Module&) */
 LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M);
 
 /** Initializes all of the function passes scheduled in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
-    See llvm::FunctionPassManager::doInitialization. */
+    @see llvm::FunctionPassManager::doInitialization */
 LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Executes all of the function passes scheduled in the function pass manager
     on the provided function. Returns 1 if any of the passes modified the
     function, false otherwise.
-    See llvm::FunctionPassManager::run(Function&). */
+    @see llvm::FunctionPassManager::run(Function&) */
 LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F);
 
 /** Finalizes all of the function passes scheduled in in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
-    See llvm::FunctionPassManager::doFinalization. */
+    @see llvm::FunctionPassManager::doFinalization */
 LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Frees the memory of a pass pipeline. For function pipelines, does not free
     the module provider.
-    See llvm::PassManagerBase::~PassManagerBase. */
+    @see llvm::PassManagerBase::~PassManagerBase. */
 void LLVMDisposePassManager(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index bf2f276..a676e37 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -19,6 +19,13 @@
 #include <stddef.h>
 
 /**
+ * @defgroup LLVMCDisassembler Disassembler
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+/**
  * An opaque reference to a disassembler context.
  */
 typedef void *LLVMDisasmContextRef;
@@ -157,6 +164,10 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes,
                              uint64_t BytesSize, uint64_t PC,
                              char *OutString, size_t OutStringSize);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif /* !defined(__cplusplus) */
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index 0c173c2..71a0d49 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -25,6 +25,19 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCEnhancedDisassembly Enhanced Disassembly
+ * @ingroup LLVMC
+ * @deprecated
+ *
+ * This module contains an interface to the Enhanced Disassembly (edis)
+ * library. The edis library is deprecated and will likely disappear in
+ * the near future. You should use the @ref LLVMCDisassembler interface
+ * instead.
+ *
+ * @{
+ */
+
 /*!
  @typedef EDByteReaderCallback
  Interface to memory from which instructions may be read.
@@ -504,6 +517,10 @@ int EDBlockEvaluateOperand(uint64_t *result,
 int EDBlockVisitTokens(EDInstRef inst,
                        EDTokenVisitor_t visitor);
 
+/**
+ * @}
+ */
+
 #endif
 
 #ifdef __cplusplus
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index f5f4061..cb77bb2 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -26,6 +26,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCExecutionEngine Execution Engine
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 void LLVMLinkInJIT(void);
 void LLVMLinkInInterpreter(void);
 
@@ -125,6 +132,10 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
 
 void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index cbe60df..cb3ab9e 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -22,6 +22,15 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCInitialization Initialization Routines
+ * @ingroup LLVMC
+ *
+ * This module contains routines used to initialize the LLVM system.
+ *
+ * @{
+ */
+
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
@@ -34,6 +43,10 @@ void LLVMInitializeIPA(LLVMPassRegistryRef R);
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R);
 void LLVMInitializeTarget(LLVMPassRegistryRef R);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index fca3946..5338d3f 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -20,6 +20,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCLinkTimeOptimizer Link Time Optimization
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
   /// This provides a dummy type for pointers to the LTO object.
   typedef void* llvm_lto_t;
 
@@ -51,6 +58,10 @@ extern "C" {
   extern llvm_lto_status_t llvm_optimize_modules
     (llvm_lto_t lto, const char* output_filename);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index 6b465b3..e2dad62 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -28,6 +28,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCObject Object file reading and writing
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 // Opaque type wrappers
 typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
 typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef;
@@ -86,6 +93,9 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI);
 const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI);
 const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI);
 
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index e4f177b..258a2be 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -26,6 +26,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTarget Target information
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian };
 
 typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
@@ -209,6 +216,9 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef, LLVMTypeRef StructTy,
     See the destructor llvm::TargetData::~TargetData. */
 void LLVMDisposeTargetData(LLVMTargetDataRef);
 
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 710bebe..4480780 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -21,6 +21,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsIPO Interprocedural transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::createArgumentPromotionPass function. */
 void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM);
 
@@ -63,6 +70,10 @@ void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM);
 /** See llvm::createStripSymbolsPass function. */
 void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif /* defined(__cplusplus) */
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index fa722c9..cee6e5a 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -23,6 +23,13 @@ typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsPassManagerBuilder Pass manager builder
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::PassManagerBuilder. */
 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void);
 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB);
@@ -73,6 +80,10 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
                                                   bool Internalize,
                                                   bool RunInliner);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 6015ef9..a2c4d61 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -25,6 +25,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsScalar Scalar transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::createAggressiveDCEPass function. */
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
 
@@ -116,6 +123,9 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM);
 /** See llvm::createBasicAliasAnalysisPass function */
 void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index ce92eaa..9e7c754 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -26,9 +26,20 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsVectorize Vectorization transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::createBBVectorizePass function. */
 void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif /* defined(__cplusplus) */
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 7ea7ad0..43ef9dc 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -20,6 +20,13 @@
 #include <stddef.h>
 #include <unistd.h>
 
+/**
+ * @defgroup LLVMCLTO LTO
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 #define LTO_API_VERSION 4
 
 typedef enum {
@@ -289,5 +296,8 @@ lto_codegen_debug_options(lto_code_gen_t cg, const char *);
 }
 #endif
 
+/**
+ * @}
+ */
 
 #endif
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index b08564c..4101989 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -561,7 +561,15 @@ public:
   /// Performs logical negation operation on this APInt.
   /// @returns true if *this is zero, false otherwise.
   /// @brief Logical negation operator.
-  bool operator!() const;
+  bool operator!() const {
+    if (isSingleWord())
+      return !VAL;
+
+    for (unsigned i = 0; i != getNumWords(); ++i)
+      if (pVal[i])
+        return false;
+    return true;
+  }
 
   /// @}
   /// @name Assignment Operators
@@ -834,7 +842,11 @@ public:
 
   /// @returns the bit value at bitPosition
   /// @brief Array-indexing support.
-  bool operator[](unsigned bitPosition) const;
+  bool operator[](unsigned bitPosition) const {
+    assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
+    return (maskBit(bitPosition) &
+            (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
+  }
 
   /// @}
   /// @name Comparison Operators
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
index 06f4ce2..81a5117 100644
--- a/include/llvm/ADT/Hashing.h
+++ b/include/llvm/ADT/Hashing.h
@@ -113,7 +113,7 @@ public:
 /// differing argument types even if they would implicit promote to a common
 /// type without changing the value.
 template <typename T>
-typename enable_if<is_integral<T>, hash_code>::type hash_value(T value);
+typename enable_if<is_integral_or_enum<T>, hash_code>::type hash_value(T value);
 
 /// \brief Compute a hash_code for a pointer's address.
 ///
@@ -349,14 +349,15 @@ inline size_t get_execution_seed() {
 /// reading the underlying data. It is false if values of this type must
 /// first be passed to hash_value, and the resulting hash_codes combined.
 //
-// FIXME: We want to replace is_integral and is_pointer here with a predicate
-// which asserts that comparing the underlying storage of two values of the
-// type for equality is equivalent to comparing the two values for equality.
-// For all the platforms we care about, this holds for integers and pointers,
-// but there are platforms where it doesn't and we would like to support
-// user-defined types which happen to satisfy this property.
+// FIXME: We want to replace is_integral_or_enum and is_pointer here with
+// a predicate which asserts that comparing the underlying storage of two
+// values of the type for equality is equivalent to comparing the two values
+// for equality. For all the platforms we care about, this holds for integers
+// and pointers, but there are platforms where it doesn't and we would like to
+// support user-defined types which happen to satisfy this property.
 template <typename T> struct is_hashable_data
-  : integral_constant<bool, ((is_integral<T>::value || is_pointer<T>::value) &&
+  : integral_constant<bool, ((is_integral_or_enum<T>::value ||
+                              is_pointer<T>::value) &&
                              64 % sizeof(T) == 0)> {};
 
 // Special case std::pair to detect when both types are viable and when there
@@ -419,8 +420,6 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
   while (first != last && store_and_advance(buffer_ptr, buffer_end,
                                             get_hashable_data(*first)))
     ++first;
-/// \brief Metafunction that determines whether the given type is an integral
-/// type.
   if (first == last)
     return hash_short(buffer, buffer_ptr - buffer, seed);
   assert(buffer_ptr == buffer_end);
@@ -458,7 +457,7 @@ hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
 /// and directly reads from the underlying memory.
 template <typename ValueT>
 typename enable_if<is_hashable_data<ValueT>, hash_code>::type
-hash_combine_range_impl(const ValueT *first, const ValueT *last) {
+hash_combine_range_impl(ValueT *first, ValueT *last) {
   const size_t seed = get_execution_seed();
   const char *s_begin = reinterpret_cast<const char *>(first);
   const char *s_end = reinterpret_cast<const char *>(last);
@@ -734,7 +733,8 @@ inline hash_code hash_integer_value(uint64_t value) {
 // Declared and documented above, but defined here so that any of the hashing
 // infrastructure is available.
 template <typename T>
-typename enable_if<is_integral<T>, hash_code>::type hash_value(T value) {
+typename enable_if<is_integral_or_enum<T>, hash_code>::type
+hash_value(T value) {
   return ::llvm::hashing::detail::hash_integer_value(value);
 }
 
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 2926462..89b1648 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -347,7 +347,7 @@ public:
       if (prev)
         prev->next = next;
       else
-        factory->Cache[computeDigest()] = next;
+        factory->Cache[factory->maskCacheIndex(computeDigest())] = next;
     }
     
     // We need to clear the mutability bit in case we are
@@ -429,6 +429,11 @@ protected:
   TreeTy*         getRight(TreeTy* T) const { return T->getRight(); }
   value_type_ref  getValue(TreeTy* T) const { return T->value; }
 
+  // Make sure the index is not the Tombstone or Entry key of the DenseMap.
+  static inline unsigned maskCacheIndex(unsigned I) {
+	return (I & ~0x02);
+  }
+
   unsigned incrementHeight(TreeTy* L, TreeTy* R) const {
     unsigned hl = getHeight(L);
     unsigned hr = getHeight(R);
@@ -611,7 +616,7 @@ public:
     // Search the hashtable for another tree with the same digest, and
     // if find a collision compare those trees by their contents.
     unsigned digest = TNew->computeDigest();
-    TreeTy *&entry = Cache[digest];
+    TreeTy *&entry = Cache[maskCacheIndex(digest)];
     do {
       if (!entry)
         break;
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 83e225c..931b67e 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -1977,7 +1977,7 @@ iterator::overflow(unsigned Level) {
     CurSize[Nodes] = CurSize[NewNode];
     Node[Nodes] = Node[NewNode];
     CurSize[NewNode] = 0;
-    Node[NewNode] = this->map->newNode<NodeT>();
+    Node[NewNode] = this->map->template newNode<NodeT>();
     ++Nodes;
   }
 
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 85dbba2..ccdcd1a 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -92,10 +92,14 @@ public:
   }
 
   PointerTy const *getAddrOfPointer() const {
+    return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
+  }
+
+  PointerTy *getAddrOfPointer() {
     assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&
            "Can only return the address if IntBits is cleared and "
            "PtrTraits doesn't change the pointer");
-    return reinterpret_cast<PointerTy const *>(&Value);
+    return reinterpret_cast<PointerTy *>(&Value);
   }
 
   void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 487096a..614b59c 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -142,16 +142,19 @@ namespace llvm {
       return T();
     }
 
-    /// \brief If the union is set to the first pointer type we can get an
-    /// address pointing to it.
-    template <typename T>
-    PT1 const *getAddrOf() const {
+    /// \brief If the union is set to the first pointer type get an address
+    /// pointing to it.
+    PT1 const *getAddrOfPtr1() const {
+      return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
+    }
+
+    /// \brief If the union is set to the first pointer type get an address
+    /// pointing to it.
+    PT1 *getAddrOfPtr1() {
       assert(is<PT1>() && "Val is not the first pointer");
       assert(get<PT1>() == Val.getPointer() &&
          "Can't get the address because PointerLikeTypeTraits changes the ptr");
-      T const *can_only_get_address_of_first_pointer_type
-                        = reinterpret_cast<PT1 const *>(Val.getAddrOfPointer());
-      return can_only_get_address_of_first_pointer_type;
+      return (PT1 *)Val.getAddrOfPointer();
     }
     
     /// Assignment operators - Allow assigning into this union from either
@@ -263,7 +266,7 @@ namespace llvm {
         ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
           ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
                                                                    >::Return Ty;
-      return Ty(Val).is<T>();
+      return Ty(Val).template is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -276,7 +279,7 @@ namespace llvm {
         ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
           ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
                                                                    >::Return Ty;
-      return Ty(Val).get<T>();
+      return Ty(Val).template get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 9992858..70693d5 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -137,6 +137,10 @@ private:
 
   void operator=(const SmallPtrSetImpl &RHS);  // DO NOT IMPLEMENT.
 protected:
+  /// swap - Swaps the elements of two sets.
+  /// Note: This method assumes that both sets have the same small size.
+  void swap(SmallPtrSetImpl &RHS);
+
   void CopyFrom(const SmallPtrSetImpl &RHS);
 };
 
@@ -287,8 +291,20 @@ public:
     return *this;
   }
 
+  /// swap - Swaps the elements of two sets.
+  void swap(SmallPtrSet<PtrType, SmallSize> &RHS) {
+    SmallPtrSetImpl::swap(RHS);
+  }
 };
 
 }
 
+namespace std {
+  /// Implement std::swap in terms of SmallPtrSet swap.
+  template<class T, unsigned N>
+  inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
 #endif
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index d01cf32..0d9d0d1 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -23,30 +23,6 @@
 #include <iterator>
 #include <memory>
 
-#ifdef _MSC_VER
-namespace std {
-#if _MSC_VER <= 1310
-  // Work around flawed VC++ implementation of std::uninitialized_copy.  Define
-  // additional overloads so that elements with pointer types are recognized as
-  // scalars and not objects, causing bizarre type conversion errors.
-  template<class T1, class T2>
-  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1 **, T2 **) {
-    _Scalar_ptr_iterator_tag _Cat;
-    return _Cat;
-  }
-
-  template<class T1, class T2>
-  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1* const *, T2 **) {
-    _Scalar_ptr_iterator_tag _Cat;
-    return _Cat;
-  }
-#else
-// FIXME: It is not clear if the problem is fixed in VS 2005.  What is clear
-// is that the above hack won't work if it wasn't fixed.
-#endif
-}
-#endif
-
 namespace llvm {
 
 /// SmallVectorBase - This is all the non-templated stuff common to all
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 7c824ee..89774c3 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -260,15 +260,6 @@ public:
     }
     BecameZero = allzero;
   }
-
-  // Get a hash value for this element;
-  uint64_t getHashValue() const {
-    uint64_t HashVal = 0;
-    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
-      HashVal ^= Bits[i];
-    }
-    return HashVal;
-  }
 };
 
 template <unsigned ElementSize = 128>
@@ -809,18 +800,6 @@ public:
   iterator end() const {
     return iterator(this, true);
   }
-
-  // Get a hash value for this bitmap.
-  uint64_t getHashValue() const {
-    uint64_t HashVal = 0;
-    for (ElementListConstIter Iter = Elements.begin();
-         Iter != Elements.end();
-         ++Iter) {
-      HashVal ^= Iter->index();
-      HashVal ^= Iter->getHashValue();
-    }
-    return HashVal;
-  }
 };
 
 // Convenience functions to allow Or and And without dereferencing in the user
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index a2bb7fe..76ba66e 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -10,16 +10,26 @@
 #ifndef LLVM_ADT_STRINGREF_H
 #define LLVM_ADT_STRINGREF_H
 
+#include "llvm/Support/type_traits.h"
+
 #include <cassert>
 #include <cstring>
-#include <utility>
+#include <limits>
 #include <string>
+#include <utility>
 
 namespace llvm {
   template<typename T>
   class SmallVectorImpl;
   class APInt;
   class hash_code;
+  class StringRef;
+
+  /// Helper functions for StringRef::getAsInteger.
+  bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                            unsigned long long &Result);
+
+  bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
 
   /// StringRef - Represent a constant reference to a string, i.e. a character
   /// array and a length, which need not be null terminated.
@@ -305,14 +315,29 @@ namespace llvm {
     ///
     /// If the string is invalid or if only a subset of the string is valid,
     /// this returns true to signify the error.  The string is considered
-    /// erroneous if empty.
+    /// erroneous if empty or if it overflows T.
     ///
-    bool getAsInteger(unsigned Radix, long long &Result) const;
-    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
-    bool getAsInteger(unsigned Radix, int &Result) const;
-    bool getAsInteger(unsigned Radix, unsigned &Result) const;
+    template <typename T>
+    typename enable_if_c<std::numeric_limits<T>::is_signed, bool>::type
+    getAsInteger(unsigned Radix, T &Result) const {
+      long long LLVal;
+      if (getAsSignedInteger(*this, Radix, LLVal) ||
+            static_cast<T>(LLVal) != LLVal)
+        return true;
+      Result = LLVal;
+      return false;
+    }
 
-    // TODO: Provide overloads for int/unsigned that check for overflow.
+    template <typename T>
+    typename enable_if_c<!std::numeric_limits<T>::is_signed, bool>::type
+    getAsInteger(unsigned Radix, T &Result) const {
+      unsigned long long ULLVal;
+      if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
+            static_cast<T>(ULLVal) != ULLVal)
+        return true;
+      Result = ULLVal;
+      return false;
+    }
 
     /// getAsInteger - Parse the current string as an integer of the
     /// specified radix, or of an autosensed radix if the radix given
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index e27dd4b..5014517 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -42,7 +42,7 @@ public:
     if (Val.isNull())
       return ArrayRef<EltTy>();
     if (Val.template is<EltTy>())
-      return *Val.template getAddrOf<EltTy>();
+      return *Val.getAddrOfPtr1();
     return *Val.template get<VecTy*>();
   }
   
@@ -63,18 +63,20 @@ public:
     return Val.template get<VecTy*>()->size();
   }
   
-  typedef const EltTy *iterator;
-  iterator begin() const {
+  typedef const EltTy *const_iterator;
+  typedef EltTy *iterator;
+
+  iterator begin() {
     if (empty())
       return 0;
     
     if (Val.template is<EltTy>())
-      return Val.template getAddrOf<EltTy>();
+      return Val.getAddrOfPtr1();
     
     return Val.template get<VecTy *>()->begin();
 
   }
-  iterator end() const {
+  iterator end() {
     if (empty())
       return 0;
     
@@ -84,7 +86,14 @@ public:
     return Val.template get<VecTy *>()->end();
   }
 
-  
+  const_iterator begin() const {
+    return (const_iterator)const_cast<TinyPtrVector*>(this)->begin();
+  }
+
+  const_iterator end() const {
+    return (const_iterator)const_cast<TinyPtrVector*>(this)->end();
+  }
+
   EltTy operator[](unsigned i) const {
     assert(!Val.isNull() && "can't index into an empty vector");
     if (EltTy V = Val.template dyn_cast<EltTy>()) {
@@ -133,6 +142,20 @@ public:
     }
     // Otherwise, we're already empty.
   }
+
+  iterator erase(iterator I) {
+    // If we have a single value, convert to empty.
+    if (Val.template is<EltTy>()) {
+      if (I == begin())
+        Val = (EltTy)0;
+    } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+      // multiple items in a vector; just do the erase, there is no
+      // benefit to collapsing back to a pointer
+      return Vec->erase(I);
+    }
+
+    return 0;
+  }
   
 private:
   void operator=(const TinyPtrVector&); // NOT IMPLEMENTED YET.
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 98f0b62..102c95a 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -53,6 +53,7 @@ public:
     msp430,  // MSP430: msp430
     ppc,     // PPC: powerpc
     ppc64,   // PPC64: powerpc64, ppu
+    r600,    // R600: AMD GPUs HD2XXX - HD6XXX
     sparc,   // Sparc: sparc
     sparcv9, // Sparcv9: Sparcv9
     tce,     // TCE (http://tce.cs.tut.fi/): tce
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index bcacfd9..ba9864a 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -652,10 +652,6 @@ struct ilist : public iplist<NodeTy> {
   void push_front(const NodeTy &val) { insert(this->begin(), val); }
   void push_back(const NodeTy &val) { insert(this->end(), val); }
 
-  // Special forms of insert...
-  template<class InIt> void insert(iterator where, InIt first, InIt last) {
-    for (; first != last; ++first) insert(where, *first);
-  }
   void insert(iterator where, size_type count, const NodeTy &val) {
     for (; count != 0; --count) insert(where, val);
   }
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 2cde838..4704a92 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -95,8 +95,9 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
       
       std::string Str;
       raw_string_ostream OS(Str);
-      unsigned Case = SI->resolveCaseIndex(SuccNo);
-      OS << SI->getCaseValue(Case)->getValue();
+      SwitchInst::ConstCaseIt Case =
+          SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); 
+      OS << Case.getCaseValue()->getValue();
       return OS.str();
     }    
     return "";
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 6d34781..033e19b 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -1,4 +1,4 @@
-//===- CodeMetrics.h - Measures the weight of a function---------*- C++ -*-===//
+//===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,53 +18,58 @@
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
-
   class BasicBlock;
   class Function;
   class TargetData;
   class Value;
 
-  // CodeMetrics - Calculate size and a few similar metrics for a set of
-  // basic blocks.
-  struct CodeMetrics {
-    /// NeverInline - True if this callee should never be inlined into a
-    /// caller.
-    // bool NeverInline;
+  /// \brief Check whether a call will lower to something small.
+  ///
+  /// This tests checks whether calls to this function will lower to something
+  /// significantly cheaper than a traditional call, often a single
+  /// instruction.
+  bool callIsSmall(const Function *F);
 
-    // True if this function contains a call to setjmp or other functions
-    // with attribute "returns twice" without having the attribute itself.
+  /// \brief Utility to calculate the size and a few similar metrics for a set
+  /// of basic blocks.
+  struct CodeMetrics {
+    /// \brief True if this function contains a call to setjmp or other functions
+    /// with attribute "returns twice" without having the attribute itself.
     bool exposesReturnsTwice;
 
-    // True if this function calls itself
+    /// \brief True if this function calls itself.
     bool isRecursive;
 
-    // True if this function contains one or more indirect branches
+    /// \brief True if this function contains one or more indirect branches.
     bool containsIndirectBr;
 
-    /// usesDynamicAlloca - True if this function calls alloca (in the C sense).
+    /// \brief True if this function calls alloca (in the C sense).
     bool usesDynamicAlloca;
 
-    /// NumInsts, NumBlocks - Keep track of how large each function is, which
-    /// is used to estimate the code size cost of inlining it.
-    unsigned NumInsts, NumBlocks;
+    /// \brief Number of instructions in the analyzed blocks.
+    unsigned NumInsts;
 
-    /// NumBBInsts - Keeps track of basic block code size estimates.
+    /// \brief Number of analyzed blocks.
+    unsigned NumBlocks;
+
+    /// \brief Keeps track of basic block code size estimates.
     DenseMap<const BasicBlock *, unsigned> NumBBInsts;
 
-    /// NumCalls - Keep track of the number of calls to 'big' functions.
+    /// \brief Keep track of the number of calls to 'big' functions.
     unsigned NumCalls;
 
-    /// NumInlineCandidates - Keep track of the number of calls to internal
-    /// functions with only a single caller.  These are likely targets for
-    /// future inlining, likely exposed by interleaved devirtualization.
+    /// \brief The number of calls to internal functions with a single caller.
+    ///
+    /// These are likely targets for future inlining, likely exposed by
+    /// interleaved devirtualization.
     unsigned NumInlineCandidates;
 
-    /// NumVectorInsts - Keep track of how many instructions produce vector
-    /// values.  The inliner is being more aggressive with inlining vector
-    /// kernels.
+    /// \brief How many instructions produce vector values.
+    ///
+    /// The inliner is more aggressive with inlining vector kernels.
     unsigned NumVectorInsts;
 
-    /// NumRets - Keep track of how many Ret instructions the block contains.
+    /// \brief How many 'ret' instructions the blocks contain.
     unsigned NumRets;
 
     CodeMetrics() : exposesReturnsTwice(false), isRecursive(false),
@@ -73,29 +78,11 @@ namespace llvm {
                     NumInlineCandidates(0), NumVectorInsts(0),
                     NumRets(0) {}
 
-    /// analyzeBasicBlock - Add information about the specified basic block
-    /// to the current structure.
+    /// \brief Add information about a block to the current state.
     void analyzeBasicBlock(const BasicBlock *BB, const TargetData *TD = 0);
 
-    /// analyzeFunction - Add information about the specified function
-    /// to the current structure.
+    /// \brief Add information about a function to the current state.
     void analyzeFunction(Function *F, const TargetData *TD = 0);
-
-    /// CountCodeReductionForConstant - Figure out an approximation for how
-    /// many instructions will be constant folded if the specified value is
-    /// constant.
-    unsigned CountCodeReductionForConstant(Value *V);
-
-    /// CountBonusForConstant - Figure out an approximation for how much
-    /// per-call performance boost we can expect if the specified value is
-    /// constant.
-    unsigned CountBonusForConstant(Value *V);
-
-    /// CountCodeReductionForAlloca - Figure out an approximation of how much
-    /// smaller the function will be if it is inlined into a context where an
-    /// argument becomes an alloca.
-    ///
-    unsigned CountCodeReductionForAlloca(Value *V);
   };
 }
 
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 2a3bb9b..11d2cf0 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -145,7 +145,8 @@ public:
   /// AddUsersIfInteresting - Inspect the specified Instruction.  If it is a
   /// reducible SCEV, recursively add its users to the IVUsesByStride set and
   /// return true.  Otherwise, return false.
-  bool AddUsersIfInteresting(Instruction *I);
+  bool AddUsersIfInteresting(Instruction *I,
+                             SmallPtrSet<Loop*,16> &SimpleLoopNests);
 
   IVStrideUse &AddUser(Instruction *User, Value *Operand);
 
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index fdcd5a8..c804c46 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -110,6 +110,34 @@ namespace llvm {
       /// entry here.
       std::vector<ArgInfo> ArgumentWeights;
 
+      /// PointerArgPairWeights - Weights to use when giving an inline bonus to
+      /// a call site due to correlated pairs of pointers.
+      DenseMap<std::pair<unsigned, unsigned>, unsigned> PointerArgPairWeights;
+
+      /// countCodeReductionForConstant - Figure out an approximation for how
+      /// many instructions will be constant folded if the specified value is
+      /// constant.
+      unsigned countCodeReductionForConstant(const CodeMetrics &Metrics,
+                                             Value *V);
+
+      /// countCodeReductionForAlloca - Figure out an approximation of how much
+      /// smaller the function will be if it is inlined into a context where an
+      /// argument becomes an alloca.
+      unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics,
+                                           Value *V);
+
+      /// countCodeReductionForPointerPair - Count the bonus to apply to an
+      /// inline call site where a pair of arguments are pointers and one
+      /// argument is a constant offset from the other. The idea is to
+      /// recognize a common C++ idiom where a begin and end iterator are
+      /// actually pointers, and many operations on the pair of them will be
+      /// constants if the function is called with arguments that have
+      /// a constant offset.
+      void countCodeReductionForPointerPair(
+          const CodeMetrics &Metrics,
+          DenseMap<Value *, unsigned> &PointerArgs,
+          Value *V, unsigned ArgIdx);
+
       /// analyzeFunction - Add information about the specified function
       /// to the current structure.
       void analyzeFunction(Function *F, const TargetData *TD);
@@ -138,28 +166,13 @@ namespace llvm {
     /// getInlineCost - The heuristic used to determine if we should inline the
     /// function call or not.
     ///
-    InlineCost getInlineCost(CallSite CS,
-                             SmallPtrSet<const Function *, 16> &NeverInline);
+    InlineCost getInlineCost(CallSite CS);
     /// getCalledFunction - The heuristic used to determine if we should inline
     /// the function call or not.  The callee is explicitly specified, to allow
     /// you to calculate the cost of inlining a function via a pointer.  The
     /// result assumes that the inlined version will always be used.  You should
     /// weight it yourself in cases where this callee will not always be called.
-    InlineCost getInlineCost(CallSite CS,
-                             Function *Callee,
-                             SmallPtrSet<const Function *, 16> &NeverInline);
-
-    /// getSpecializationBonus - The heuristic used to determine the per-call
-    /// performance boost for using a specialization of Callee with argument
-    /// SpecializedArgNos replaced by a constant.
-    int getSpecializationBonus(Function *Callee,
-             SmallVectorImpl<unsigned> &SpecializedArgNo);
-
-    /// getSpecializationCost - The heuristic used to determine the code-size
-    /// impact of creating a specialized version of Callee with argument
-    /// SpecializedArgNo replaced by a constant.
-    InlineCost getSpecializationCost(Function *Callee,
-               SmallVectorImpl<unsigned> &SpecializedArgNo);
+    InlineCost getInlineCost(CallSite CS, Function *Callee);
 
     /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
     /// higher threshold to determine if the function call should be inlined.
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 3dd194c..fb07b03 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -20,13 +20,14 @@
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
 namespace llvm {
+  template<typename T>
+  class ArrayRef;
   class DominatorTree;
   class Instruction;
-  class Value;
   class TargetData;
   class TargetLibraryInfo;
-  template<typename T>
-  class ArrayRef;
+  class Type;
+  class Value;
 
   /// SimplifyAddInst - Given operands for an Add, see if we can
   /// fold the result.  If not, this returns null.
@@ -141,11 +142,13 @@ namespace llvm {
   /// the result.  If not, this returns null.
   Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                             const TargetData *TD = 0,
+                            const TargetLibraryInfo *TLI = 0,
                             const DominatorTree *DT = 0);
 
   /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -153,8 +156,15 @@ namespace llvm {
   Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
                                  ArrayRef<unsigned> Idxs,
                                  const TargetData *TD = 0,
+                                 const TargetLibraryInfo *TLI = 0,
                                  const DominatorTree *DT = 0);
 
+  /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold
+  /// the result.  If not, this returns null.
+  Value *SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD = 0,
+                           const TargetLibraryInfo *TLI = 0,
+                           const DominatorTree *DT = 0);
+
   //=== Helper functions for higher up the class hierarchy.
 
 
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 1574262..5f0aefb 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -20,6 +20,7 @@ namespace llvm {
 
 class AliasAnalysis;
 class TargetData;
+class MDNode;
 
 /// isSafeToLoadUnconditionally - Return true if we know that executing a load
 /// from this value cannot trap.  If it is not obviously safe to load from the
@@ -41,10 +42,15 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
 /// MaxInstsToScan specifies the maximum instructions to scan in the block.
 /// If it is set to 0, it will scan the whole block. You can also optionally
 /// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there.  If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
 Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                 BasicBlock::iterator &ScanFrom,
                                 unsigned MaxInstsToScan = 6,
-                                AliasAnalysis *AA = 0);
+                                AliasAnalysis *AA = 0,
+                                MDNode **TBAATag = 0);
 
 }
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 42c68aa..475da13 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -501,7 +501,7 @@ public:
   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
   void EnterBlockInfoBlock(unsigned CodeWidth) {
     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
-    BlockInfoCurBID = -1U;
+    BlockInfoCurBID = ~0U;
   }
 private:
   /// SwitchToBlockID - If we aren't already talking about the specified block
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 77dc644..3afe309 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -229,7 +229,7 @@ public:
 
   /// getFirstUnallocated - Return the first unallocated register in the set, or
   /// NumRegs if they are all allocated.
-  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+  unsigned getFirstUnallocated(const uint16_t *Regs, unsigned NumRegs) const {
     for (unsigned i = 0; i != NumRegs; ++i)
       if (!isAllocated(Regs[i]))
         return i;
@@ -256,7 +256,7 @@ public:
   /// AllocateReg - Attempt to allocate one of the specified registers.  If none
   /// are available, return zero.  Otherwise, return the first one available,
   /// marking it and any aliases as allocated.
-  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+  unsigned AllocateReg(const uint16_t *Regs, unsigned NumRegs) {
     unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
     if (FirstUnalloc == NumRegs)
       return 0;    // Didn't find the reg.
@@ -268,7 +268,7 @@ public:
   }
 
   /// Version of AllocateReg with list of registers to be shadowed.
-  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+  unsigned AllocateReg(const uint16_t *Regs, const uint16_t *ShadowRegs,
                        unsigned NumRegs) {
     unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
     if (FirstUnalloc == NumRegs)
@@ -306,12 +306,12 @@ public:
 
   // First GPR that carries part of a byval aggregate that's split
   // between registers and memory.
-  unsigned getFirstByValReg() { return FirstByValRegValid ? FirstByValReg : 0; }
+  unsigned getFirstByValReg() const { return FirstByValRegValid ? FirstByValReg : 0; }
   void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
   void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
-  bool isFirstByValRegValid() { return FirstByValRegValid; }
+  bool isFirstByValRegValid() const { return FirstByValRegValid; }
 
-  ParmContext getCallOrPrologue() { return CallOrPrologue; }
+  ParmContext getCallOrPrologue() const { return CallOrPrologue; }
 
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index 0694caa..ee1ed07 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -36,6 +36,7 @@ class MachineInstr;
 class MachineLoopInfo;
 class MachineDominatorTree;
 class InstrItineraryData;
+class ScheduleDAGInstrs;
 class SUnit;
 
 class DFAPacketizer {
@@ -91,7 +92,7 @@ class VLIWPacketizerList {
   const TargetInstrInfo *TII;
 
   // Encapsulate data types not exposed to the target interface.
-  void *SchedulerImpl;
+  ScheduleDAGInstrs *SchedulerImpl;
 
 protected:
   // Vector of instructions assigned to the current packet.
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 1ed2547..8fb31aa 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -85,11 +85,11 @@ namespace llvm {
 
     virtual void dump(ScheduleDAG* DAG) const;
 
-    // ScheduledNode - As nodes are scheduled, we look to see if there are any
+    // scheduledNode - As nodes are scheduled, we look to see if there are any
     // successor nodes that have a single unscheduled predecessor.  If so, that
     // single predecessor has a higher priority, since scheduling it will make
     // the node available.
-    void ScheduledNode(SUnit *Node);
+    void scheduledNode(SUnit *Node);
 
 private:
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 576ce94..ef9c0c2 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -117,6 +117,10 @@ public:
   /// "(null)".
   StringRef getName() const;
 
+  /// getFullName - Return a formatted string to identify this block and its
+  /// parent function.
+  std::string getFullName() const;
+
   /// hasAddressTaken - Test whether this block is potentially the target
   /// of an indirect branch.
   bool hasAddressTaken() const { return AddressTaken; }
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 7f29d31..65093d7 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -74,9 +74,10 @@ private:
                                         // anything other than to convey comment
                                         // information to AsmPrinter.
 
+  uint16_t NumMemRefs;                  // information on memory references
+  mmo_iterator MemRefs;
+
   std::vector<MachineOperand> Operands; // the operands
-  mmo_iterator MemRefs;                 // information on memory references
-  mmo_iterator MemRefsEnd;
   MachineBasicBlock *Parent;            // Pointer to the owning basic block.
   DebugLoc debugLoc;                    // Source line information.
 
@@ -284,13 +285,13 @@ public:
 
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
-  mmo_iterator memoperands_end() const { return MemRefsEnd; }
-  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+  bool memoperands_empty() const { return NumMemRefs == 0; }
 
   /// hasOneMemOperand - Return true if this instruction has exactly one
   /// MachineMemOperand.
   bool hasOneMemOperand() const {
-    return MemRefsEnd - MemRefs == 1;
+    return NumMemRefs == 1;
   }
 
   /// API for querying MachineInstr properties. They are the same as MCInstrDesc
@@ -307,7 +308,14 @@ public:
   /// The first argument is the property being queried.
   /// The second argument indicates whether the query should look inside
   /// instruction bundles.
-  bool hasProperty(unsigned Flag, QueryType Type = AnyInBundle) const;
+  bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
+    // Inline the fast path.
+    if (Type == IgnoreBundle || !isBundle())
+      return getDesc().getFlags() & (1 << MCFlag);
+
+    // If we have a bundle, take the slow path.
+    return hasPropertyInBundle(1 << MCFlag, Type);
+  }
 
   /// isVariadic - Return true if this instruction can have a variable number of
   /// operands.  In this case, the variable operands will be after the normal
@@ -888,7 +896,7 @@ public:
   /// list. This does not transfer ownership.
   void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
     MemRefs = NewMemRefs;
-    MemRefsEnd = NewMemRefsEnd;
+    NumMemRefs = NewMemRefsEnd - NewMemRefs;
   }
 
 private:
@@ -910,6 +918,10 @@ private:
   /// this instruction from their respective use lists.  This requires that the
   /// operands not be on their use lists yet.
   void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo);
+
+  /// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a
+  /// bundle.
+  bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
 };
 
 /// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
new file mode 100644
index 0000000..e852009
--- /dev/null
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -0,0 +1,91 @@
+//==- MachineScheduler.h - MachineInstr Scheduling Pass ----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a MachineSchedRegistry for registering alternative machine
+// schedulers. A Target may provide an alternative scheduler implementation by
+// implementing the following boilerplate:
+//
+// static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
+//  return new CustomMachineScheduler(C);
+// }
+// static MachineSchedRegistry
+// SchedCustomRegistry("custom", "Run my target's custom scheduler",
+//                     createCustomMachineSched);
+//
+// Inside <Target>PassConfig:
+//   enablePass(MachineSchedulerID);
+//   MachineSchedRegistry::setDefault(createCustomMachineSched);
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHINESCHEDULER_H
+#define MACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineDominatorTree;
+class MachineLoopInfo;
+class ScheduleDAGInstrs;
+
+/// MachineSchedContext provides enough context from the MachineScheduler pass
+/// for the target to instantiate a scheduler.
+struct MachineSchedContext {
+  MachineFunction *MF;
+  const MachineLoopInfo *MLI;
+  const MachineDominatorTree *MDT;
+  const TargetPassConfig *PassConfig;
+  AliasAnalysis *AA;
+  LiveIntervals *LIS;
+
+  MachineSchedContext(): MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {}
+};
+
+/// MachineSchedRegistry provides a selection of available machine instruction
+/// schedulers.
+class MachineSchedRegistry : public MachinePassRegistryNode {
+public:
+  typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *);
+
+  // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
+  typedef ScheduleDAGCtor FunctionPassCtor;
+
+  static MachinePassRegistry Registry;
+
+  MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
+    : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+    Registry.Add(this);
+  }
+  ~MachineSchedRegistry() { Registry.Remove(this); }
+
+  // Accessors.
+  //
+  MachineSchedRegistry *getNext() const {
+    return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
+  }
+  static MachineSchedRegistry *getList() {
+    return (MachineSchedRegistry *)Registry.getList();
+  }
+  static ScheduleDAGCtor getDefault() {
+    return (ScheduleDAGCtor)Registry.getDefault();
+  }
+  static void setDefault(ScheduleDAGCtor C) {
+    Registry.setDefault((MachinePassCtor)C);
+  }
+  static void setListener(MachinePassRegistryListener *L) {
+    Registry.setListener(L);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 8ce339d..d6ca148e 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -101,7 +101,10 @@ public:
   /// point where StadardID is expected, add TargetID in its place.
   void substitutePass(char &StandardID, char &TargetID);
 
-  /// Allow the target to disable a specific standard pass.
+  /// Allow the target to enable a specific standard pass by default.
+  void enablePass(char &ID) { substitutePass(ID, ID); }
+
+  /// Allow the target to disable a specific standard pass by default.
   void disablePass(char &ID) { substitutePass(ID, NoPassID); }
 
   /// Return the pass ssubtituted for StandardID by the target.
@@ -420,10 +423,10 @@ namespace llvm {
   /// adapted to code generation.  Required if using dwarf exception handling.
   FunctionPass *createDwarfEHPass(const TargetMachine *tm);
 
-  /// createSjLjEHPass - This pass adapts exception handling code to use
+  /// createSjLjEHPreparePass - This pass adapts exception handling code to use
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
   ///
-  FunctionPass *createSjLjEHPass(const TargetLowering *tli);
+  FunctionPass *createSjLjEHPreparePass(const TargetLowering *tli);
 
   /// LocalStackSlotAllocation - This pass assigns local frame indices to stack
   /// slots relative to one another and allocates base registers to access them
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index fa7011d..56b5855 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -76,7 +76,7 @@ namespace llvm {
 
   public:
     ResourcePriorityQueue(SelectionDAGISel *IS);
-    
+
     ~ResourcePriorityQueue() {
       delete ResourcesModel;
     }
@@ -126,8 +126,8 @@ namespace llvm {
 
     virtual void dump(ScheduleDAG* DAG) const;
 
-    /// ScheduledNode - Main resource tracking point.
-    void ScheduledNode(SUnit *Node);
+    /// scheduledNode - Main resource tracking point.
+    void scheduledNode(SUnit *Node);
     bool isResourceAvailable(SUnit *SU);
     void reserveResources(SUnit *SU);
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 72eadd9..f4de693 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -8,7 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements the ScheduleDAG class, which is used as the common
-// base class for instruction schedulers.
+// base class for instruction schedulers. This encapsulates the scheduling DAG,
+// which is shared between SelectionDAG and MachineInstr scheduling.
 //
 //===----------------------------------------------------------------------===//
 
@@ -409,6 +410,13 @@ namespace llvm {
       return false;
     }
 
+    bool isTopReady() const {
+      return NumPredsLeft == 0;
+    }
+    bool isBottomReady() const {
+      return NumSuccsLeft == 0;
+    }
+
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     void print(raw_ostream &O, const ScheduleDAG *G) const;
@@ -466,13 +474,13 @@ namespace llvm {
 
     virtual void dump(ScheduleDAG *) const {}
 
-    /// ScheduledNode - As each node is scheduled, this method is invoked.  This
+    /// scheduledNode - As each node is scheduled, this method is invoked.  This
     /// allows the priority function to adjust the priority of related
     /// unscheduled nodes, for example.
     ///
-    virtual void ScheduledNode(SUnit *) {}
+    virtual void scheduledNode(SUnit *) {}
 
-    virtual void UnscheduledNode(SUnit *) {}
+    virtual void unscheduledNode(SUnit *) {}
 
     void setCurCycle(unsigned Cycle) {
       CurCycle = Cycle;
@@ -485,15 +493,11 @@ namespace llvm {
 
   class ScheduleDAG {
   public:
-    MachineBasicBlock *BB;          // The block in which to insert instructions
-    MachineBasicBlock::iterator InsertPos;// The position to insert instructions
     const TargetMachine &TM;              // Target processor
     const TargetInstrInfo *TII;           // Target instruction information
     const TargetRegisterInfo *TRI;        // Target processor register info
     MachineFunction &MF;                  // Machine function
     MachineRegisterInfo &MRI;             // Virtual/real register map
-    std::vector<SUnit*> Sequence;         // The schedule. Null SUnit*'s
-                                          // represent noop instructions.
     std::vector<SUnit> SUnits;            // The scheduling units.
     SUnit EntrySU;                        // Special node for the region entry.
     SUnit ExitSU;                         // Special node for the region exit.
@@ -508,6 +512,9 @@ namespace llvm {
 
     virtual ~ScheduleDAG();
 
+    /// clearDAG - clear the DAG state (between regions).
+    void clearDAG();
+
     /// getInstrDesc - Return the MCInstrDesc of this SUnit.
     /// Return NULL for SDNodes without a machine opcode.
     const MCInstrDesc *getInstrDesc(const SUnit *SU) const {
@@ -518,66 +525,43 @@ namespace llvm {
     /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
     /// using 'dot'.
     ///
+    void viewGraph(const Twine &Name, const Twine &Title);
     void viewGraph();
 
-    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
-    /// according to the order specified in Sequence.
-    ///
-    virtual MachineBasicBlock *EmitSchedule() = 0;
-
-    void dumpSchedule() const;
-
     virtual void dumpNode(const SUnit *SU) const = 0;
 
     /// getGraphNodeLabel - Return a label for an SUnit node in a visualization
     /// of the ScheduleDAG.
     virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0;
 
+    /// getDAGLabel - Return a label for the region of code covered by the DAG.
+    virtual std::string getDAGName() const = 0;
+
     /// addCustomGraphFeatures - Add custom features for a visualization of
     /// the ScheduleDAG.
     virtual void addCustomGraphFeatures(GraphWriter<ScheduleDAG*> &) const {}
 
 #ifndef NDEBUG
-    /// VerifySchedule - Verify that all SUnits were scheduled and that
-    /// their state is consistent.
-    void VerifySchedule(bool isBottomUp);
+    /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+    /// their state is consistent. Return the number of scheduled SUnits.
+    unsigned VerifyScheduledDAG(bool isBottomUp);
 #endif
 
   protected:
-    /// Run - perform scheduling.
-    ///
-    void Run(MachineBasicBlock *bb, MachineBasicBlock::iterator insertPos);
-
-    /// BuildSchedGraph - Build SUnits and set up their Preds and Succs
-    /// to form the scheduling dependency graph.
-    ///
-    virtual void BuildSchedGraph(AliasAnalysis *AA) = 0;
-
     /// ComputeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU) = 0;
+    virtual void computeLatency(SUnit *SU) = 0;
 
     /// ComputeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *, SUnit *,
+    virtual void computeOperandLatency(SUnit *, SUnit *,
                                        SDep&) const { }
 
-    /// Schedule - Order nodes according to selected style, filling
-    /// in the Sequence member.
-    ///
-    virtual void Schedule() = 0;
-
     /// ForceUnitLatencies - Return true if all scheduling edges should be given
     /// a latency value of one.  The default is to return false; schedulers may
     /// override this as needed.
-    virtual bool ForceUnitLatencies() const { return false; }
-
-    /// EmitNoop - Emit a noop instruction.
-    ///
-    void EmitNoop();
-
-    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+    virtual bool forceUnitLatencies() const { return false; }
 
   private:
     // Return the MCInstrDesc of this SDNode or NULL.
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index c7ffed9..a08f6cc 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -34,7 +34,7 @@ namespace llvm {
   /// For example, loop induction variable increments should be
   /// scheduled as soon as possible after the variable's last use.
   ///
-  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
+  class LoopDependencies {
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
 
@@ -98,71 +98,111 @@ namespace llvm {
     }
   };
 
+  /// An individual mapping from virtual register number to SUnit.
+  struct VReg2SUnit {
+    unsigned VirtReg;
+    SUnit *SU;
+
+    VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
+
+    unsigned getSparseSetKey() const {
+      return TargetRegisterInfo::virtReg2Index(VirtReg);
+    }
+  };
+
+  /// Combine a SparseSet with a 1x1 vector to track physical registers.
+  /// The SparseSet allows iterating over the (few) live registers for quickly
+  /// comparing against a regmask or clearing the set.
+  ///
+  /// Storage for the map is allocated once for the pass. The map can be
+  /// cleared between scheduling regions without freeing unused entries.
+  class Reg2SUnitsMap {
+    SparseSet<unsigned> PhysRegSet;
+    std::vector<std::vector<SUnit*> > SUnits;
+  public:
+    typedef SparseSet<unsigned>::const_iterator const_iterator;
+
+    // Allow iteration over register numbers (keys) in the map. If needed, we
+    // can provide an iterator over SUnits (values) as well.
+    const_iterator reg_begin() const { return PhysRegSet.begin(); }
+    const_iterator reg_end() const { return PhysRegSet.end(); }
+
+    /// Initialize the map with the number of registers.
+    /// If the map is already large enough, no allocation occurs.
+    /// For simplicity we expect the map to be empty().
+    void setRegLimit(unsigned Limit);
+
+    /// Returns true if the map is empty.
+    bool empty() const { return PhysRegSet.empty(); }
+
+    /// Clear the map without deallocating storage.
+    void clear();
+
+    bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); }
+
+    /// If this register is mapped, return its existing SUnits vector.
+    /// Otherwise map the register and return an empty SUnits vector.
+    std::vector<SUnit *> &operator[](unsigned Reg) {
+      bool New = PhysRegSet.insert(Reg).second;
+      assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
+      (void)New;
+      return SUnits[Reg];
+    }
+
+    /// Erase an existing element without freeing memory.
+    void erase(unsigned Reg) {
+      PhysRegSet.erase(Reg);
+      SUnits[Reg].clear();
+    }
+  };
+
+  /// Use SparseSet as a SparseMap by relying on the fact that it never
+  /// compares ValueT's, only unsigned keys. This allows the set to be cleared
+  /// between scheduling regions in constant time as long as ValueT does not
+  /// require a destructor.
+  typedef SparseSet<VReg2SUnit> VReg2SUnitMap;
+
   /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
   /// MachineInstrs.
-  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
+  class ScheduleDAGInstrs : public ScheduleDAG {
+  protected:
     const MachineLoopInfo &MLI;
     const MachineDominatorTree &MDT;
     const MachineFrameInfo *MFI;
     const InstrItineraryData *InstrItins;
 
-    /// isPostRA flag indicates vregs cannot be present.
-    bool IsPostRA;
-
     /// Live Intervals provides reaching defs in preRA scheduling.
     LiveIntervals *LIS;
 
-    DenseMap<MachineInstr*, SUnit*> MISUnitMap;
+    /// isPostRA flag indicates vregs cannot be present.
+    bool IsPostRA;
 
     /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using
     /// the def-side latency only.
     bool UnitLatencies;
 
-    /// Combine a SparseSet with a 1x1 vector to track physical registers.
-    /// The SparseSet allows iterating over the (few) live registers for quickly
-    /// comparing against a regmask or clearing the set.
-    ///
-    /// Storage for the map is allocated once for the pass. The map can be
-    /// cleared between scheduling regions without freeing unused entries.
-    class Reg2SUnitsMap {
-      SparseSet<unsigned> PhysRegSet;
-      std::vector<std::vector<SUnit*> > SUnits;
-    public:
-      typedef SparseSet<unsigned>::const_iterator const_iterator;
-
-      // Allow iteration over register numbers (keys) in the map. If needed, we
-      // can provide an iterator over SUnits (values) as well.
-      const_iterator reg_begin() const { return PhysRegSet.begin(); }
-      const_iterator reg_end() const { return PhysRegSet.end(); }
-
-      /// Initialize the map with the number of registers.
-      /// If the map is already large enough, no allocation occurs.
-      /// For simplicity we expect the map to be empty().
-      void setRegLimit(unsigned Limit);
-
-      /// Returns true if the map is empty.
-      bool empty() const { return PhysRegSet.empty(); }
-
-      /// Clear the map without deallocating storage.
-      void clear();
-
-      bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); }
-
-      /// If this register is mapped, return its existing SUnits vector.
-      /// Otherwise map the register and return an empty SUnits vector.
-      std::vector<SUnit *> &operator[](unsigned Reg) {
-        bool New = PhysRegSet.insert(Reg).second;
-        assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
-        (void)New;
-        return SUnits[Reg];
-      }
+    /// State specific to the current scheduling region.
+    /// ------------------------------------------------
+
+    /// The block in which to insert instructions
+    MachineBasicBlock *BB;
+
+    /// The beginning of the range to be scheduled.
+    MachineBasicBlock::iterator RegionBegin;
+
+    /// The end of the range to be scheduled.
+    MachineBasicBlock::iterator RegionEnd;
+
+    /// The index in BB of RegionEnd.
+    unsigned EndIndex;
+
+    /// After calling BuildSchedGraph, each machine instruction in the current
+    /// scheduling region is mapped to an SUnit.
+    DenseMap<MachineInstr*, SUnit*> MISUnitMap;
+
+    /// State internal to DAG building.
+    /// -------------------------------
 
-      /// Erase an existing element without freeing memory.
-      void erase(unsigned Reg) {
-        PhysRegSet.erase(Reg);
-        SUnits[Reg].clear();
-      }
-    };
     /// Defs, Uses - Remember where defs and uses of each register are as we
     /// iterate upward through the instructions. This is allocated here instead
     /// of inside BuildSchedGraph to avoid the need for it to be initialized and
@@ -170,22 +210,6 @@ namespace llvm {
     Reg2SUnitsMap Defs;
     Reg2SUnitsMap Uses;
 
-    /// An individual mapping from virtual register number to SUnit.
-    struct VReg2SUnit {
-      unsigned VirtReg;
-      SUnit *SU;
-
-      VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
-
-      unsigned getSparseSetKey() const {
-        return TargetRegisterInfo::virtReg2Index(VirtReg);
-      }
-    };
-    /// Use SparseSet as a SparseMap by relying on the fact that it never
-    /// compares ValueT's, only unsigned keys. This allows the set to be cleared
-    /// between scheduling regions in constant time as long as ValueT does not
-    /// require a destructor.
-    typedef SparseSet<VReg2SUnit> VReg2SUnitMap;
     /// Track the last instructon in this region defining each virtual register.
     VReg2SUnitMap VRegDefs;
 
@@ -198,20 +222,15 @@ namespace llvm {
     ///
     LoopDependencies LoopRegs;
 
-  protected:
-
     /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+    /// These are generated by buildSchedGraph but persist so they can be
+    /// referenced when emitting the final schedule.
     typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
       DbgValueVector;
     DbgValueVector DbgValues;
     MachineInstr *FirstDbgValue;
 
   public:
-    MachineBasicBlock::iterator Begin;    // The beginning of the range to
-                                          // be scheduled. The range extends
-                                          // to InsertPos.
-    unsigned InsertPosIndex;              // The index in BB of InsertPos.
-
     explicit ScheduleDAGInstrs(MachineFunction &mf,
                                const MachineLoopInfo &mli,
                                const MachineDominatorTree &mdt,
@@ -220,77 +239,72 @@ namespace llvm {
 
     virtual ~ScheduleDAGInstrs() {}
 
-    /// NewSUnit - Creates a new SUnit and return a ptr to it.
-    ///
-    SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
-      const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
-      SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
-      assert((Addr == 0 || Addr == &SUnits[0]) &&
-             "SUnits std::vector reallocated on the fly!");
-      SUnits.back().OrigNode = &SUnits.back();
-      return &SUnits.back();
-    }
+    /// begin - Return an iterator to the top of the current scheduling region.
+    MachineBasicBlock::iterator begin() const { return RegionBegin; }
 
+    /// end - Return an iterator to the bottom of the current scheduling region.
+    MachineBasicBlock::iterator end() const { return RegionEnd; }
 
-    /// Run - perform scheduling.
-    ///
-    void Run(MachineBasicBlock *bb,
-             MachineBasicBlock::iterator begin,
-             MachineBasicBlock::iterator end,
-             unsigned endindex);
+    /// newSUnit - Creates a new SUnit and return a ptr to it.
+    SUnit *newSUnit(MachineInstr *MI);
+
+    /// getSUnit - Return an existing SUnit for this MI, or NULL.
+    SUnit *getSUnit(MachineInstr *MI) const;
 
-    /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+    /// startBlock - Prepare to perform scheduling in the given block.
+    virtual void startBlock(MachineBasicBlock *BB);
+
+    /// finishBlock - Clean up after scheduling in the given block.
+    virtual void finishBlock();
+
+    /// Initialize the scheduler state for the next scheduling region.
+    virtual void enterRegion(MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator begin,
+                             MachineBasicBlock::iterator end,
+                             unsigned endcount);
+
+    /// Notify that the scheduler has finished scheduling the current region.
+    virtual void exitRegion();
+
+    /// buildSchedGraph - Build SUnits from the MachineBasicBlock that we are
     /// input.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
+    void buildSchedGraph(AliasAnalysis *AA);
 
-    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+    /// addSchedBarrierDeps - Add dependencies from instructions in the current
     /// list of instructions being scheduled to scheduling barrier. We want to
     /// make sure instructions which define registers that are either used by
     /// the terminator or are live-out are properly scheduled. This is
     /// especially important when the definition latency of the return value(s)
     /// are too high to be hidden by the branch or when the liveout registers
     /// used by instructions in the fallthrough block.
-    void AddSchedBarrierDeps();
+    void addSchedBarrierDeps();
 
-    /// ComputeLatency - Compute node latency.
+    /// computeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU);
+    virtual void computeLatency(SUnit *SU);
 
-    /// ComputeOperandLatency - Override dependence edge latency using
+    /// computeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const;
 
-    virtual MachineBasicBlock *EmitSchedule();
-
-    /// StartBlock - Prepare to perform scheduling in the given block.
-    ///
-    virtual void StartBlock(MachineBasicBlock *BB);
-
-    /// Schedule - Order nodes according to selected style, filling
+    /// schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
-    virtual void Schedule() = 0;
-
-    /// FinishBlock - Clean up after scheduling in the given block.
-    ///
-    virtual void FinishBlock();
+    /// Typically, a scheduling algorithm will implement schedule() without
+    /// overriding enterRegion() or exitRegion().
+    virtual void schedule() = 0;
 
     virtual void dumpNode(const SUnit *SU) const;
 
+    /// Return a label for a DAG node that points to an instruction.
     virtual std::string getGraphNodeLabel(const SUnit *SU) const;
 
-  protected:
-    SUnit *getSUnit(MachineInstr *MI) const {
-      DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
-      if (I == MISUnitMap.end())
-        return 0;
-      return I->second;
-    }
+    /// Return a label for the region of code covered by the DAG.
+    virtual std::string getDAGName() const;
 
+  protected:
     void initSUnits();
     void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO);
     void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
@@ -301,6 +315,26 @@ namespace llvm {
       return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
     }
   };
-}
+
+  /// newSUnit - Creates a new SUnit and return a ptr to it.
+  inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+    const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+    SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+    assert((Addr == 0 || Addr == &SUnits[0]) &&
+           "SUnits std::vector reallocated on the fly!");
+    SUnits.back().OrigNode = &SUnits.back();
+    return &SUnits.back();
+  }
+
+  /// getSUnit - Return an existing SUnit for this MI, or NULL.
+  inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
+    DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
+    if (I == MISUnitMap.end())
+      return 0;
+    return I->second;
+  }
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index c475014..e50bff5 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -677,9 +677,6 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
-/* Define if we have the oprofile JIT-support library */
-#undef USE_OPROFILE
-
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
@@ -716,4 +713,10 @@
 /* Added by Kevin -- Maximum path length */
 #cmakedefine MAXPATHLEN ${MAXPATHLEN}
 
+/* Support for Intel JIT Events API is enabled */
+#cmakedefine LLVM_USE_INTEL_JITEVENTS 1
+
+/* Support for OProfile JIT API is enabled */
+#cmakedefine LLVM_USE_OPROFILE 1
+
 #endif
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 1a996a2..723a5f6 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -615,6 +615,12 @@
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
+/* Define if we have the Intel JIT API runtime support library */
+#undef LLVM_USE_INTEL_JITEVENTS
+
+/* Define if we have the oprofile JIT-support library */
+#undef LLVM_USE_OPROFILE
+
 /* Major version of the LLVM API */
 #undef LLVM_VERSION_MAJOR
 
@@ -675,9 +681,6 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
-/* Define if we have the oprofile JIT-support library */
-#undef USE_OPROFILE
-
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index b9ade51..da5ad27 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -195,9 +195,10 @@ class StructType : public CompositeType {
     // This is the contents of the SubClassData field.
     SCDB_HasBody = 1,
     SCDB_Packed = 2,
-    SCDB_IsLiteral = 4
+    SCDB_IsLiteral = 4,
+    SCDB_IsSized = 8
   };
-  
+
   /// SymbolTableEntry - For a named struct that actually has a name, this is a
   /// pointer to the symbol table entry (maintained by LLVMContext) for the
   /// struct.  This is null if the type is an literal struct or if it is
@@ -248,6 +249,9 @@ public:
   /// isOpaque - Return true if this is a type with an identity that has no body
   /// specified yet.  These prints as 'opaque' in .ll files.
   bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
+
+  /// isSized - Return true if this is a sized type.
+  bool isSized() const;
   
   /// hasName - Return true if this is a named struct that has a non-empty name.
   bool hasName() const { return SymbolTableEntry != 0; }
diff --git a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h b/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
new file mode 100644
index 0000000..ca87342
--- /dev/null
+++ b/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
@@ -0,0 +1,102 @@
+//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper for the Intel JIT Events API. It allows for the
+// implementation of the jitprofiling library to be swapped with an alternative
+// implementation (for testing). To include this file, you must have the
+// jitprofiling.h header available; it is available in Intel(R) VTune(TM)
+// Amplifier XE 2011.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INTEL_JIT_EVENTS_WRAPPER_H
+#define INTEL_JIT_EVENTS_WRAPPER_H
+
+#include <jitprofiling.h>
+
+namespace llvm {
+
+class IntelJITEventsWrapper {
+  // Function pointer types for testing implementation of Intel jitprofiling
+  // library
+  typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*);
+  typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx );
+  typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void);
+  typedef void (*FinalizeThreadPtr)(void);
+  typedef void (*FinalizeProcessPtr)(void);
+  typedef unsigned int (*GetNewMethodIDPtr)(void);
+
+  NotifyEventPtr NotifyEventFunc;
+  RegisterCallbackExPtr RegisterCallbackExFunc;
+  IsProfilingActivePtr IsProfilingActiveFunc;
+  FinalizeThreadPtr FinalizeThreadFunc;
+  FinalizeProcessPtr FinalizeProcessFunc;
+  GetNewMethodIDPtr GetNewMethodIDFunc;
+
+public:
+  bool isAmplifierRunning() {
+    return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON;
+  }
+
+  IntelJITEventsWrapper()
+  : NotifyEventFunc(::iJIT_NotifyEvent),
+    RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
+    IsProfilingActiveFunc(::iJIT_IsProfilingActive),
+    FinalizeThreadFunc(::FinalizeThread),
+    FinalizeProcessFunc(::FinalizeProcess),
+    GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
+  }
+
+  IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl,
+                   RegisterCallbackExPtr RegisterCallbackExImpl,
+                   IsProfilingActivePtr IsProfilingActiveImpl,
+                   FinalizeThreadPtr FinalizeThreadImpl,
+                   FinalizeProcessPtr FinalizeProcessImpl,
+                   GetNewMethodIDPtr GetNewMethodIDImpl)
+  : NotifyEventFunc(NotifyEventImpl),
+    RegisterCallbackExFunc(RegisterCallbackExImpl),
+    IsProfilingActiveFunc(IsProfilingActiveImpl),
+    FinalizeThreadFunc(FinalizeThreadImpl),
+    FinalizeProcessFunc(FinalizeProcessImpl),
+    GetNewMethodIDFunc(GetNewMethodIDImpl) {
+  }
+
+  // Sends an event anncouncing that a function has been emitted
+  //   return values are event-specific.  See Intel documentation for details.
+  int  iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+    if (!NotifyEventFunc)
+      return -1;
+    return NotifyEventFunc(EventType, EventSpecificData);
+  }
+
+  // Registers a callback function to receive notice of profiling state changes
+  void iJIT_RegisterCallbackEx(void *UserData,
+                               iJIT_ModeChangedEx NewModeCallBackFuncEx) {
+    if (RegisterCallbackExFunc)
+      RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx);
+  }
+
+  // Returns the current profiler mode
+  iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) {
+    if (!IsProfilingActiveFunc)
+      return iJIT_NOTHING_RUNNING;
+    return IsProfilingActiveFunc();
+  }
+
+  // Generates a locally unique method ID for use in code registration
+  unsigned int iJIT_GetNewMethodID(void) {
+    if (!GetNewMethodIDFunc)
+      return -1;
+    return GetNewMethodIDFunc();
+  }
+};
+
+} //namespace llvm
+
+#endif //INTEL_JIT_EVENTS_WRAPPER_H
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index 94212e1..eea603f 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 
+#include "llvm/Config/config.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -23,6 +24,8 @@
 namespace llvm {
 class Function;
 class MachineFunction;
+class OProfileWrapper;
+class IntelJITEventsWrapper;
 
 /// JITEvent_EmittedFunctionDetails - Helper struct for containing information
 /// about a generated machine code function.
@@ -72,11 +75,42 @@ public:
   /// to NotifyFunctionEmitted may have been destroyed by the time of the
   /// matching NotifyFreeingMachineCode call.
   virtual void NotifyFreeingMachineCode(void *) {}
-};
 
-// This returns NULL if support isn't available.
-JITEventListener *createOProfileJITEventListener();
+#if LLVM_USE_INTEL_JITEVENTS
+  // Construct an IntelJITEventListener
+  static JITEventListener *createIntelJITEventListener();
+
+  // Construct an IntelJITEventListener with a test Intel JIT API implementation
+  static JITEventListener *createIntelJITEventListener(
+                                      IntelJITEventsWrapper* AlternativeImpl);
+#else
+  static JITEventListener *createIntelJITEventListener() { return 0; }
+
+  static JITEventListener *createIntelJITEventListener(
+                                      IntelJITEventsWrapper* AlternativeImpl) {
+    return 0;
+  }
+#endif // USE_INTEL_JITEVENTS
+
+#if LLVM_USE_OPROFILE
+  // Construct an OProfileJITEventListener
+  static JITEventListener *createOProfileJITEventListener();
+
+  // Construct an OProfileJITEventListener with a test opagent implementation
+  static JITEventListener *createOProfileJITEventListener(
+                                      OProfileWrapper* AlternativeImpl);
+#else
+
+  static JITEventListener *createOProfileJITEventListener() { return 0; }
+
+  static JITEventListener *createOProfileJITEventListener(
+                                      OProfileWrapper* AlternativeImpl) {
+    return 0;
+  }
+#endif // USE_OPROFILE
+
+};
 
 } // end namespace llvm.
 
-#endif
+#endif // defined LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 8eb7590..3587e38 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -47,6 +47,17 @@ public:
   /// debugging, and may be turned on by default in debug mode.
   virtual void setPoisonMemory(bool poison) = 0;
 
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) = 0;
+
   //===--------------------------------------------------------------------===//
   // Global Offset Table Management
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
new file mode 100644
index 0000000..ab7f25e
--- /dev/null
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -0,0 +1,124 @@
+//===-- OProfileWrapper.h - OProfile JIT API Wrapper ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a OProfileWrapper object that detects if the oprofile
+// daemon is running, and provides wrappers for opagent functions used to
+// communicate with the oprofile JIT interface. The dynamic library libopagent
+// does not need to be linked directly as this object lazily loads the library
+// when the first op_ function is called.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPROFILE_WRAPPER_H
+#define OPROFILE_WRAPPER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <opagent.h>
+
+namespace llvm {
+
+
+class OProfileWrapper {
+  typedef  op_agent_t    (*op_open_agent_ptr_t)();
+  typedef  int           (*op_close_agent_ptr_t)(op_agent_t);
+  typedef  int           (*op_write_native_code_ptr_t)(op_agent_t,
+                                                const char*,
+                                                uint64_t,
+                                                void const*,
+                                                const unsigned int);
+  typedef  int           (*op_write_debug_line_info_ptr_t)(op_agent_t,
+                                                void const*,
+                                                size_t,
+                                                struct debug_line_info const*);
+  typedef  int           (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t);
+
+  // Also used for op_minor_version function which has the same signature
+  typedef  int           (*op_major_version_ptr_t)(void);
+
+  // This is not a part of the opagent API, but is useful nonetheless
+  typedef  bool          (*IsOProfileRunningPtrT)(void);
+
+
+  op_agent_t                      Agent;
+  op_open_agent_ptr_t             OpenAgentFunc;
+  op_close_agent_ptr_t            CloseAgentFunc;
+  op_write_native_code_ptr_t      WriteNativeCodeFunc;
+  op_write_debug_line_info_ptr_t  WriteDebugLineInfoFunc;
+  op_unload_native_code_ptr_t     UnloadNativeCodeFunc;
+  op_major_version_ptr_t          MajorVersionFunc;
+  op_major_version_ptr_t          MinorVersionFunc;
+  IsOProfileRunningPtrT           IsOProfileRunningFunc;
+
+  bool Initialized;
+
+public:
+  OProfileWrapper();
+
+  // For testing with a mock opagent implementation, skips the dynamic load and
+  // the function resolution.
+  OProfileWrapper(op_open_agent_ptr_t OpenAgentImpl,
+                  op_close_agent_ptr_t CloseAgentImpl,
+                  op_write_native_code_ptr_t WriteNativeCodeImpl,
+                  op_write_debug_line_info_ptr_t WriteDebugLineInfoImpl,
+                  op_unload_native_code_ptr_t UnloadNativeCodeImpl,
+                  op_major_version_ptr_t MajorVersionImpl,
+                  op_major_version_ptr_t MinorVersionImpl,
+                  IsOProfileRunningPtrT MockIsOProfileRunningImpl = 0)
+  : OpenAgentFunc(OpenAgentImpl),
+    CloseAgentFunc(CloseAgentImpl),
+    WriteNativeCodeFunc(WriteNativeCodeImpl),
+    WriteDebugLineInfoFunc(WriteDebugLineInfoImpl),
+    UnloadNativeCodeFunc(UnloadNativeCodeImpl),
+    MajorVersionFunc(MajorVersionImpl),
+    MinorVersionFunc(MinorVersionImpl),
+    IsOProfileRunningFunc(MockIsOProfileRunningImpl),
+    Initialized(true)
+  {
+  }
+
+  // Calls op_open_agent in the oprofile JIT library and saves the returned
+  // op_agent_t handle internally so it can be used when calling all the other
+  // op_* functions. Callers of this class do not need to keep track of
+  // op_agent_t objects.
+  bool op_open_agent();
+
+  int op_close_agent();
+  int op_write_native_code(const char* name,
+                           uint64_t addr,
+                           void const* code,
+                           const unsigned int size);
+  int op_write_debug_line_info(void const* code,
+                               size_t num_entries,
+                               struct debug_line_info const* info);
+  int op_unload_native_code(uint64_t addr);
+  int op_major_version(void);
+  int op_minor_version(void);
+
+  // Returns true if the oprofiled process is running, the opagent library is
+  // loaded and a connection to the agent has been established, and false
+  // otherwise.
+  bool isAgentAvailable();
+
+private:
+  // Loads the libopagent library and initializes this wrapper if the oprofile
+  // daemon is running
+  bool initialize();
+
+  // Searches /proc for the oprofile daemon and returns true if the process if
+  // found, or false otherwise.
+  bool checkForOProfileProcEntry();
+
+  bool isOProfileRunning();
+};
+
+} // namespace llvm
+
+#endif //OPROFILE_WRAPPER_H
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 8ad316b..aabfd03 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -45,15 +45,9 @@ public:
   virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                        unsigned SectionID) = 0;
 
-  // Allocate ActualSize bytes, or more, for the named function. Return
-  // a pointer to the allocated memory and update Size to reflect how much
-  // memory was acutally allocated.
-  virtual uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) = 0;
-
-  // Mark the end of the function, including how much of the allocated
-  // memory was actually used.
-  virtual void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) = 0;
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) = 0;
+
 };
 
 class RuntimeDyld {
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index b5916b7..e17cd87 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -146,7 +146,7 @@ public:
   /// The particular intrinsic functions which correspond to this value are
   /// defined in llvm/Intrinsics.h.
   ///
-  unsigned getIntrinsicID() const LLVM_ATTRIBUTE_READONLY;
+  unsigned getIntrinsicID() const LLVM_READONLY;
   bool isIntrinsic() const { return getIntrinsicID() != 0; }
 
   /// getCallingConv()/setCallingConv(CC) - These method get and set the
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 52fa8d7..e3676fe 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -24,7 +24,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <iterator>
-#include <limits.h>
 
 namespace llvm {
 
@@ -2468,8 +2467,120 @@ class SwitchInst : public TerminatorInst {
 protected:
   virtual SwitchInst *clone_impl() const;
 public:
+  
+  // -2
+  static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
+  
+  template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy> 
+  class CaseIteratorT {
+  protected:
+    
+    SwitchInstTy *SI;
+    unsigned Index;
+    
+  public:
+    
+    typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, BasicBlockTy> Self;
+    
+    /// Initializes case iterator for given SwitchInst and for given
+    /// case number.    
+    CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) {
+      this->SI = SI;
+      Index = CaseNum;
+    }
+    
+    /// Initializes case iterator for given SwitchInst and for given
+    /// TerminatorInst's successor index.
+    static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) {
+      assert(SuccessorIndex < SI->getNumSuccessors() &&
+             "Successor index # out of range!");    
+      return SuccessorIndex != 0 ? 
+             Self(SI, SuccessorIndex - 1) :
+             Self(SI, DefaultPseudoIndex);       
+    }
+    
+    /// Resolves case value for current case.
+    ConstantIntTy *getCaseValue() {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      return reinterpret_cast<ConstantIntTy*>(SI->getOperand(2 + Index*2));
+    }
+    
+    /// Resolves successor for current case.
+    BasicBlockTy *getCaseSuccessor() {
+      assert((Index < SI->getNumCases() || DefaultPseudoIndex) &&
+             "Index out the number of cases.");
+      return SI->getSuccessor(getSuccessorIndex());      
+    }
+    
+    /// Returns number of current case.
+    unsigned getCaseIndex() const { return Index; }
+    
+    /// Returns TerminatorInst's successor index for current case successor.
+    unsigned getSuccessorIndex() const {
+      assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) &&
+             "Index out the number of cases.");
+      return Index != DefaultPseudoIndex ? Index + 1 : 0;
+    }
+    
+    Self operator++() {
+      // Check index correctness after increment.
+      // Note: Index == getNumCases() means end().      
+      assert(Index+1 <= SI->getNumCases() && "Index out the number of cases.");
+      ++Index;
+      return *this;
+    }
+    Self operator++(int) {
+      Self tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+    Self operator--() { 
+      // Check index correctness after decrement.
+      // Note: Index == getNumCases() means end().
+      // Also allow "-1" iterator here. That will became valid after ++.
+      assert((Index == 0 || Index-1 <= SI->getNumCases()) &&
+             "Index out the number of cases.");
+      --Index;
+      return *this;
+    }
+    Self operator--(int) {
+      Self tmp = *this;
+      --(*this);
+      return tmp;
+    }
+    bool operator==(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index == Index;
+    }
+    bool operator!=(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index != Index;
+    }
+  };
+  
+  typedef CaseIteratorT<const SwitchInst, const ConstantInt, const BasicBlock>
+    ConstCaseIt;
 
-  enum { ErrorIndex = UINT_MAX };
+  class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> {
+    
+    typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy;
+  
+  public:
+    
+    CaseIt(const ParentTy& Src) : ParentTy(Src) {}
+    CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}
+
+    /// Sets the new value for current case.    
+    void setValue(ConstantInt *V) {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V));
+    }
+    
+    /// Sets the new successor for current case.
+    void setSuccessor(BasicBlock *S) {
+      SI->setSuccessor(getSuccessorIndex(), S);      
+    }
+  };
 
   static SwitchInst *Create(Value *Value, BasicBlock *Default,
                             unsigned NumCases, Instruction *InsertBefore = 0) {
@@ -2479,6 +2590,7 @@ public:
                             unsigned NumCases, BasicBlock *InsertAtEnd) {
     return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
   }
+  
   ~SwitchInst();
 
   /// Provide fast operand accessors
@@ -2502,78 +2614,84 @@ public:
     return getNumOperands()/2 - 1;
   }
 
-  /// getCaseValue - Return the specified case value. Note that case #0, means
-  /// first case, not a default case.
-  ConstantInt *getCaseValue(unsigned i) {
-    assert(i < getNumCases() && "Illegal case value to get!");
-    return reinterpret_cast<ConstantInt*>(getOperand(2 + i*2));
+  /// Returns a read/write iterator that points to the first
+  /// case in SwitchInst.
+  CaseIt case_begin() {
+    return CaseIt(this, 0);
   }
-
-  /// getCaseValue - Return the specified case value. Note that case #0, means
-  /// first case, not a default case.
-  const ConstantInt *getCaseValue(unsigned i) const {
-    assert(i < getNumCases() && "Illegal case value to get!");
-    return reinterpret_cast<const ConstantInt*>(getOperand(2 + i*2));
+  /// Returns a read-only iterator that points to the first
+  /// case in the SwitchInst.
+  ConstCaseIt case_begin() const {
+    return ConstCaseIt(this, 0);
   }
-
-  // setSuccessorValue - Updates the value associated with the specified
-  // case.
-  void setCaseValue(unsigned i, ConstantInt *CaseValue) {
-    assert(i < getNumCases() && "Case index # out of range!");
-    setOperand(2 + i*2, reinterpret_cast<Value*>(CaseValue));
+  
+  /// Returns a read/write iterator that points one past the last
+  /// in the SwitchInst.
+  CaseIt case_end() {
+    return CaseIt(this, getNumCases());
+  }
+  /// Returns a read-only iterator that points one past the last
+  /// in the SwitchInst.
+  ConstCaseIt case_end() const {
+    return ConstCaseIt(this, getNumCases());
+  }
+  /// Returns an iterator that points to the default case.
+  /// Note: this iterator allows to resolve successor only. Attempt
+  /// to resolve case value causes an assertion.
+  /// Also note, that increment and decrement also causes an assertion and
+  /// makes iterator invalid. 
+  CaseIt case_default() {
+    return CaseIt(this, DefaultPseudoIndex);
+  }
+  ConstCaseIt case_default() const {
+    return ConstCaseIt(this, DefaultPseudoIndex);
   }
-
+  
   /// findCaseValue - Search all of the case values for the specified constant.
-  /// If it is explicitly handled, return the case number of it, otherwise
-  /// return ErrorIndex to indicate that it is handled by the default handler.
-  unsigned findCaseValue(const ConstantInt *C) const {
-    for (unsigned i = 0, e = getNumCases(); i != e; ++i)
-      if (getCaseValue(i) == C)
+  /// If it is explicitly handled, return the case iterator of it, otherwise
+  /// return default case iterator to indicate
+  /// that it is handled by the default handler.
+  CaseIt findCaseValue(const ConstantInt *C) {
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValue() == C)
         return i;
-    return ErrorIndex;
-  }
-  
-  /// resolveSuccessorIndex - Converts case index to index of its successor
-  /// index in TerminatorInst successors collection.
-  /// If CaseIndex == ErrorIndex, "default" successor will returned then. 
-  unsigned resolveSuccessorIndex(unsigned CaseIndex) const {
-    assert((CaseIndex == ErrorIndex || CaseIndex < getNumCases()) &&
-           "Case index # out of range!");
-    return CaseIndex != ErrorIndex ? CaseIndex + 1 : 0;
+    return case_default();
   }
+  ConstCaseIt findCaseValue(const ConstantInt *C) const {
+    for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValue() == C)
+        return i;
+    return case_default();
+  }    
   
-  /// resolveCaseIndex - Converts index of successor in TerminatorInst
-  /// collection to index of case that corresponds to this successor.
-  unsigned resolveCaseIndex(unsigned SuccessorIndex) const {
-    assert(SuccessorIndex < getNumSuccessors() &&
-           "Successor index # out of range!");    
-    return SuccessorIndex != 0 ? SuccessorIndex - 1 : ErrorIndex; 
-  }
-
   /// findCaseDest - Finds the unique case value for a given successor. Returns
   /// null if the successor is not found, not unique, or is the default case.
   ConstantInt *findCaseDest(BasicBlock *BB) {
     if (BB == getDefaultDest()) return NULL;
 
     ConstantInt *CI = NULL;
-    for (unsigned i = 0, e = getNumCases(); i != e; ++i) {
-      if (getSuccessor(i + 1) == BB) {
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) {
+      if (i.getCaseSuccessor() == BB) {
         if (CI) return NULL;   // Multiple cases lead to BB.
-        else CI = getCaseValue(i);
+        else CI = i.getCaseValue();
       }
     }
     return CI;
   }
 
   /// addCase - Add an entry to the switch instruction...
-  ///
+  /// Note:
+  /// This action invalidates case_end(). Old case_end() iterator will
+  /// point to the added case.
   void addCase(ConstantInt *OnVal, BasicBlock *Dest);
 
   /// removeCase - This method removes the specified case and its successor
   /// from the switch instruction. Note that this operation may reorder the
   /// remaining cases at index idx and above.
-  ///
-  void removeCase(unsigned idx);
+  /// Note:
+  /// This action invalidates iterators for all cases following the one removed,
+  /// including the case_end() iterator.
+  void removeCase(CaseIt i);
 
   unsigned getNumSuccessors() const { return getNumOperands()/2; }
   BasicBlock *getSuccessor(unsigned idx) const {
@@ -2585,36 +2703,6 @@ public:
     setOperand(idx*2+1, (Value*)NewSucc);
   }
 
-  /// Resolves successor for idx-th case.
-  /// Use getCaseSuccessor instead of TerminatorInst::getSuccessor,
-  /// since internal SwitchInst organization of operands/successors is
-  /// hidden and may be changed in any moment.
-  BasicBlock *getCaseSuccessor(unsigned idx) const {
-    return getSuccessor(resolveSuccessorIndex(idx));
-  }
-
-  /// Set new successor for idx-th case.
-  /// Use setCaseSuccessor instead of TerminatorInst::setSuccessor,
-  /// since internal SwitchInst organization of operands/successors is
-  /// hidden and may be changed in any moment.
-  void setCaseSuccessor(unsigned idx, BasicBlock *NewSucc) {
-    setSuccessor(resolveSuccessorIndex(idx), NewSucc);
-  }
-
-  // getSuccessorValue - Return the value associated with the specified
-  // successor.
-  ConstantInt *getSuccessorValue(unsigned idx) const {
-    assert(idx < getNumSuccessors() && "Successor # out of range!");
-    return reinterpret_cast<ConstantInt*>(getOperand(idx*2));
-  }
-
-  // setSuccessorValue - Updates the value associated with the specified
-  // successor.
-  void setSuccessorValue(unsigned idx, ConstantInt* SuccessorValue) {
-    assert(idx < getNumSuccessors() && "Successor # out of range!");
-    setOperand(idx*2, reinterpret_cast<Value*>(SuccessorValue));
-  }
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SwitchInst *) { return true; }
   static inline bool classof(const Instruction *I) {
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index ca497b8..186612d 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -58,17 +58,17 @@ public:
   /// if the operand is a register.  If isLookupPtrRegClass is set, then this is
   /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to
   /// get a dynamic register class.
-  short RegClass;
+  int16_t RegClass;
 
   /// Flags - These are flags from the MCOI::OperandFlags enum.
-  unsigned short Flags;
+  uint8_t Flags;
+
+  /// OperandType - Information about the type of the operand.
+  uint8_t OperandType;
 
   /// Lower 16 bits are used to specify which constraints are set. The higher 16
   /// bits are used to specify the value of constraints (4 bits each).
-  unsigned Constraints;
-
-  /// OperandType - Information about the type of the operand.
-  MCOI::OperandType OperandType;
+  uint32_t Constraints;
   /// Currently no other information.
 
   /// isLookupPtrRegClass - Set if this operand is a pointer value and it
@@ -139,8 +139,8 @@ public:
   unsigned short  Size;          // Number of bytes in encoding.
   unsigned        Flags;         // Flags identifying machine instr class
   uint64_t        TSFlags;       // Target Specific Flag values
-  const unsigned *ImplicitUses;  // Registers implicitly read by this instr
-  const unsigned *ImplicitDefs;  // Registers implicitly defined by this instr
+  const uint16_t *ImplicitUses;  // Registers implicitly read by this instr
+  const uint16_t *ImplicitDefs;  // Registers implicitly defined by this instr
   const MCOperandInfo *OpInfo;   // 'NumOperands' entries about operands
 
   /// getOperandConstraint - Returns the value of the specific constraint if
@@ -448,7 +448,7 @@ public:
   /// does.
   ///
   /// This method returns null if the instruction has no implicit uses.
-  const unsigned *getImplicitUses() const {
+  const uint16_t *getImplicitUses() const {
     return ImplicitUses;
   }
 
@@ -471,7 +471,7 @@ public:
   /// EAX/EDX/EFLAGS registers.
   ///
   /// This method returns null if the instruction has no implicit defs.
-  const unsigned *getImplicitDefs() const {
+  const uint16_t *getImplicitDefs() const {
     return ImplicitDefs;
   }
 
@@ -487,7 +487,7 @@ public:
   /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
   /// uses the specified physical register.
   bool hasImplicitUseOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpUses = ImplicitUses)
+    if (const uint16_t *ImpUses = ImplicitUses)
       for (; *ImpUses; ++ImpUses)
         if (*ImpUses == Reg) return true;
     return false;
@@ -496,7 +496,7 @@ public:
   /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
   /// defines the specified physical register.
   bool hasImplicitDefOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpDefs = ImplicitDefs)
+    if (const uint16_t *ImpDefs = ImplicitDefs)
       for (; *ImpDefs; ++ImpDefs)
         if (*ImpDefs == Reg) return true;
     return false;
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 762558d..1937fdc 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -31,10 +31,10 @@ public:
   const char *Name;
   const iterator RegsBegin;
   const uint8_t *const RegSet;
-  const uint8_t RegsSize;
-  const uint8_t RegSetSize;
-  const uint8_t ID;
-  const uint8_t RegSize, Alignment; // Size & Alignment of register in bytes
+  const uint16_t RegsSize;
+  const uint16_t RegSetSize;
+  const uint16_t ID;
+  const uint16_t RegSize, Alignment; // Size & Alignment of register in bytes
   const int8_t CopyCost;
   const bool Allocatable;
 
@@ -107,9 +107,9 @@ public:
 ///
 struct MCRegisterDesc {
   const char *Name;         // Printable name for the reg (for debugging)
-  uint16_t   Overlaps;      // Overlapping registers, described above
-  uint16_t   SubRegs;       // Sub-register set, described above
-  uint16_t   SuperRegs;     // Super-register set, described above
+  uint32_t   Overlaps;      // Overlapping registers, described above
+  uint32_t   SubRegs;       // Sub-register set, described above
+  uint32_t   SuperRegs;     // Super-register set, described above
 };
 
 /// MCRegisterInfo base class - We assume that the target defines a static
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index e6df856..358b27a 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -131,7 +131,7 @@ public:
   // Cast methods.
   static inline bool classof(Archive const *v) { return true; }
   static inline bool classof(Binary const *v) {
-    return v->getType() == Binary::isArchive;
+    return v->isArchive();
   }
 
 private:
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index cd092fd..77a08d5 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -37,16 +37,25 @@ protected:
   Binary(unsigned int Type, MemoryBuffer *Source);
 
   enum {
-    isArchive,
-
+    ID_Archive,
     // Object and children.
-    isObject,
-    isCOFF,
-    isELF,
-    isMachO,
-    lastObject
+    ID_StartObjects,
+    ID_COFF,
+    ID_ELF32L, // ELF 32-bit, little endian
+    ID_ELF32B, // ELF 32-bit, big endian
+    ID_ELF64L, // ELF 64-bit, little endian
+    ID_ELF64B, // ELF 64-bit, big endian
+    ID_MachO,
+    ID_EndObjects
   };
 
+  static inline unsigned int getELFType(bool isLittleEndian, bool is64Bits) {
+    if (isLittleEndian)
+      return is64Bits ? ID_ELF64L : ID_ELF32L;
+    else
+      return is64Bits ? ID_ELF64B : ID_ELF32B;
+  }
+
 public:
   virtual ~Binary();
 
@@ -56,9 +65,37 @@ public:
   // Cast methods.
   unsigned int getType() const { return TypeID; }
   static inline bool classof(const Binary *v) { return true; }
+
+  // Convenience methods
+  bool isObject() const {
+    return TypeID > ID_StartObjects && TypeID < ID_EndObjects;
+  }
+
+  bool isArchive() const {
+    return TypeID == ID_Archive;
+  }
+
+  bool isELF() const {
+    return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B;
+  }
+
+  bool isMachO() const {
+    return TypeID == ID_MachO;
+  }
+
+  bool isCOFF() const {
+    return TypeID == ID_COFF;
+  }
 };
 
+/// @brief Create a Binary from Source, autodetecting the file type.
+///
+/// @param Source The data to create the Binary from. Ownership is transfered
+///        to Result if successful. If an error is returned, Source is destroyed
+///        by createBinary before returning.
+/// @param Result A pointer to the resulting Binary if no error occured.
 error_code createBinary(MemoryBuffer *Source, OwningPtr<Binary> &Result);
+
 error_code createBinary(StringRef Path, OwningPtr<Binary> &Result);
 
 }
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index 4f90187..91971bb 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -19,6 +19,9 @@
 #include "llvm/Support/Endian.h"
 
 namespace llvm {
+  template <typename T>
+  class ArrayRef;
+
 namespace object {
 
 struct coff_file_header {
@@ -177,9 +180,12 @@ public:
     return ec;
   }
   error_code getSymbolName(const coff_symbol *symbol, StringRef &Res) const;
+  error_code getSectionName(const coff_section *Sec, StringRef &Res) const;
+  error_code getSectionContents(const coff_section *Sec,
+                                ArrayRef<uint8_t> &Res) const;
 
   static inline bool classof(const Binary *v) {
-    return v->getType() == isCOFF;
+    return v->isCOFF();
   }
   static inline bool classof(const COFFObjectFile *v) { return true; }
 };
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index e746b0a..d33f317 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ELF.h"
@@ -176,7 +177,72 @@ struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
   }
 };
 
-// Elf_Dyn: Entry in the dynamic table
+/// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section
+/// (.gnu.version). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Versym_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Half vs_index;   // Version index with flags (e.g. VERSYM_HIDDEN)
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdaux_Impl;
+
+/// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section
+/// (.gnu.version_d). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdef_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
+  Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
+  Elf_Half vd_flags;   // Bitwise flags (VER_DEF_*)
+  Elf_Half vd_ndx;     // Version index, used in .gnu.version entries
+  Elf_Half vd_cnt;     // Number of Verdaux entries
+  Elf_Word vd_hash;    // Hash of name
+  Elf_Word vd_aux;     // Offset to the first Verdaux entry (in bytes)
+  Elf_Word vd_next;    // Offset to the next Verdef entry (in bytes)
+
+  /// Get the first Verdaux entry for this Verdef.
+  const Elf_Verdaux *getAux() const {
+    return reinterpret_cast<const Elf_Verdaux*>((const char*)this + vd_aux);
+  }
+};
+
+/// Elf_Verdaux: This is the structure of auxilary data in the SHT_GNU_verdef
+/// section (.gnu.version_d). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdaux_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Word vda_name; // Version name (offset in string table)
+  Elf_Word vda_next; // Offset to next Verdaux entry (in bytes)
+};
+
+/// Elf_Verneed: This is the structure of entries in the SHT_GNU_verneed
+/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verneed_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT)
+  Elf_Half vn_cnt;     // Number of associated Vernaux entries
+  Elf_Word vn_file;    // Library name (string table offset)
+  Elf_Word vn_aux;     // Offset to first Vernaux entry (in bytes)
+  Elf_Word vn_next;    // Offset to next Verneed entry (in bytes)
+};
+
+/// Elf_Vernaux: This is the structure of auxiliary data in SHT_GNU_verneed
+/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Vernaux_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Word vna_hash;  // Hash of dependency name
+  Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*)
+  Elf_Half vna_other; // Version index, used in .gnu.version entries
+  Elf_Word vna_name;  // Dependency name
+  Elf_Word vna_next;  // Offset to next Vernaux entry (in bytes)
+};
+
+/// Elf_Dyn_Base: This structure matches the form of entries in the dynamic
+///               table section (.dynamic) look like.
 template<support::endianness target_endianness, bool is64Bits>
 struct Elf_Dyn_Base;
 
@@ -200,6 +266,7 @@ struct Elf_Dyn_Base<target_endianness, true> {
   } d_un;
 };
 
+/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters.
 template<support::endianness target_endianness, bool is64Bits>
 struct Elf_Dyn_Impl : Elf_Dyn_Base<target_endianness, is64Bits> {
   using Elf_Dyn_Base<target_endianness, is64Bits>::d_tag;
@@ -323,6 +390,11 @@ class ELFObjectFile : public ObjectFile {
   typedef Elf_Dyn_Impl<target_endianness, is64Bits> Elf_Dyn;
   typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
   typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
+  typedef Elf_Verdef_Impl<target_endianness, is64Bits> Elf_Verdef;
+  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
+  typedef Elf_Verneed_Impl<target_endianness, is64Bits> Elf_Verneed;
+  typedef Elf_Vernaux_Impl<target_endianness, is64Bits> Elf_Vernaux;
+  typedef Elf_Versym_Impl<target_endianness, is64Bits> Elf_Versym;
   typedef DynRefImpl<target_endianness, is64Bits> DynRef;
   typedef content_iterator<DynRef> dyn_iterator;
 
@@ -364,15 +436,48 @@ private:
   const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
   const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
   const Elf_Shdr *dot_dynstr_sec;   // Dynamic symbol string table.
+
+  // SymbolTableSections[0] always points to the dynamic string table section
+  // header, or NULL if there is no dynamic string table.
   Sections_t SymbolTableSections;
   IndexMap_t SymbolTableSectionsIndexMap;
   DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
 
-  const Elf_Shdr *dot_dynamic_sec; // .dynamic
+  const Elf_Shdr *dot_dynamic_sec;       // .dynamic
+  const Elf_Shdr *dot_gnu_version_sec;   // .gnu.version
+  const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r
+  const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d
+
   // Pointer to SONAME entry in dynamic string table
   // This is set the first time getLoadName is called.
   mutable const char *dt_soname;
 
+  // Records for each version index the corresponding Verdef or Vernaux entry.
+  // This is filled the first time LoadVersionMap() is called.
+  class VersionMapEntry : public PointerIntPair<const void*, 1> {
+    public:
+    // If the integer is 0, this is an Elf_Verdef*.
+    // If the integer is 1, this is an Elf_Vernaux*.
+    VersionMapEntry() : PointerIntPair<const void*, 1>(NULL, 0) { }
+    VersionMapEntry(const Elf_Verdef *verdef)
+        : PointerIntPair<const void*, 1>(verdef, 0) { }
+    VersionMapEntry(const Elf_Vernaux *vernaux)
+        : PointerIntPair<const void*, 1>(vernaux, 1) { }
+    bool isNull() const { return getPointer() == NULL; }
+    bool isVerdef() const { return !isNull() && getInt() == 0; }
+    bool isVernaux() const { return !isNull() && getInt() == 1; }
+    const Elf_Verdef *getVerdef() const {
+      return isVerdef() ? (const Elf_Verdef*)getPointer() : NULL;
+    }
+    const Elf_Vernaux *getVernaux() const {
+      return isVernaux() ? (const Elf_Vernaux*)getPointer() : NULL;
+    }
+  };
+  mutable SmallVector<VersionMapEntry, 16> VersionMap;
+  void LoadVersionDefs(const Elf_Shdr *sec) const;
+  void LoadVersionNeeds(const Elf_Shdr *ec) const;
+  void LoadVersionMap() const;
+
   /// @brief Map sections to an array of relocation sections that reference
   ///        them sorted by section index.
   RelocMap_t SectionRelocMap;
@@ -396,6 +501,10 @@ private:
   error_code      getSymbolName(const Elf_Shdr *section,
                                 const Elf_Sym *Symb,
                                 StringRef &Res) const;
+  error_code      getSymbolVersion(const Elf_Shdr *section,
+                                   const Elf_Sym *Symb,
+                                   StringRef &Version,
+                                   bool &IsDefault) const;
   void VerifyStrTab(const Elf_Shdr *sh) const;
 
 protected:
@@ -404,7 +513,8 @@ protected:
 
 public:
   const Elf_Dyn  *getDyn(DataRefImpl DynData) const;
-
+  error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
+                              bool &IsDefault) const;
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
@@ -473,6 +583,7 @@ public:
 
   virtual uint8_t getBytesInAddress() const;
   virtual StringRef getFileFormatName() const;
+  virtual StringRef getObjectType() const { return "ELF"; }
   virtual unsigned getArch() const;
   virtual StringRef getLoadName() const;
 
@@ -484,11 +595,95 @@ public:
   // Methods for type inquiry through isa, cast, and dyn_cast
   bool isDyldType() const { return isDyldELFObject; }
   static inline bool classof(const Binary *v) {
-    return v->getType() == Binary::isELF;
+    return v->getType() == getELFType(target_endianness == support::little,
+                                      is64Bits);
   }
   static inline bool classof(const ELFObjectFile *v) { return true; }
 };
 
+// Iterate through the version definitions, and place each Elf_Verdef
+// in the VersionMap according to its index.
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::
+                  LoadVersionDefs(const Elf_Shdr *sec) const {
+  unsigned vd_size = sec->sh_size; // Size of section in bytes
+  unsigned vd_count = sec->sh_info; // Number of Verdef entries
+  const char *sec_start = (const char*)base() + sec->sh_offset;
+  const char *sec_end = sec_start + vd_size;
+  // The first Verdef entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vd_count; i++) {
+    if (p + sizeof(Elf_Verdef) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version definitions.");
+    const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
+    if (vd->vd_version != ELF::VER_DEF_CURRENT)
+      report_fatal_error("Unexpected verdef version");
+    size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
+    if (index >= VersionMap.size())
+      VersionMap.resize(index+1);
+    VersionMap[index] = VersionMapEntry(vd);
+    p += vd->vd_next;
+  }
+}
+
+// Iterate through the versions needed section, and place each Elf_Vernaux
+// in the VersionMap according to its index.
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::
+                  LoadVersionNeeds(const Elf_Shdr *sec) const {
+  unsigned vn_size = sec->sh_size; // Size of section in bytes
+  unsigned vn_count = sec->sh_info; // Number of Verneed entries
+  const char *sec_start = (const char*)base() + sec->sh_offset;
+  const char *sec_end = sec_start + vn_size;
+  // The first Verneed entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vn_count; i++) {
+    if (p + sizeof(Elf_Verneed) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version needed records.");
+    const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
+    if (vn->vn_version != ELF::VER_NEED_CURRENT)
+      report_fatal_error("Unexpected verneed version");
+    // Iterate through the Vernaux entries
+    const char *paux = p + vn->vn_aux;
+    for (unsigned j = 0; j < vn->vn_cnt; j++) {
+      if (paux + sizeof(Elf_Vernaux) > sec_end)
+        report_fatal_error("Section ended unexpected while scanning auxiliary "
+                           "version needed records.");
+      const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
+      size_t index = vna->vna_other & ELF::VERSYM_VERSION;
+      if (index >= VersionMap.size())
+        VersionMap.resize(index+1);
+      VersionMap[index] = VersionMapEntry(vna);
+      paux += vna->vna_next;
+    }
+    p += vn->vn_next;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::LoadVersionMap() const {
+  // If there is no dynamic symtab or version table, there is nothing to do.
+  if (SymbolTableSections[0] == NULL || dot_gnu_version_sec == NULL)
+    return;
+
+  // Has the VersionMap already been loaded?
+  if (VersionMap.size() > 0)
+    return;
+
+  // The first two version indexes are reserved.
+  // Index 0 is LOCAL, index 1 is GLOBAL.
+  VersionMap.push_back(VersionMapEntry());
+  VersionMap.push_back(VersionMapEntry());
+
+  if (dot_gnu_version_d_sec)
+    LoadVersionDefs(dot_gnu_version_d_sec);
+
+  if (dot_gnu_version_r_sec)
+    LoadVersionNeeds(dot_gnu_version_r_sec);
+}
+
 template<support::endianness target_endianness, bool is64Bits>
 void ELFObjectFile<target_endianness, is64Bits>
                   ::validateSymbol(DataRefImpl Symb) const {
@@ -546,6 +741,18 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 }
 
 template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolVersion(SymbolRef SymRef,
+                                           StringRef &Version,
+                                           bool &IsDefault) const {
+  DataRefImpl Symb = SymRef.getRawDataRefImpl();
+  validateSymbol(Symb);
+  const Elf_Sym *symb = getSymbol(Symb);
+  return getSymbolVersion(SymbolTableSections[Symb.d.b], symb,
+                          Version, IsDefault);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
 ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
                       ::getSymbolTableIndex(const Elf_Sym *symb) const {
   if (symb->st_shndx == ELF::SHN_XINDEX)
@@ -1257,14 +1464,19 @@ void ELFObjectFile<target_endianness, is64Bits>
 template<support::endianness target_endianness, bool is64Bits>
 ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
                                                           , error_code &ec)
-  : ObjectFile(Binary::isELF, Object, ec)
+  : ObjectFile(getELFType(target_endianness == support::little, is64Bits),
+               Object, ec)
   , isDyldELFObject(false)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
   , dot_strtab_sec(0)
   , dot_dynstr_sec(0)
   , dot_dynamic_sec(0)
-  , dt_soname(0) {
+  , dot_gnu_version_sec(0)
+  , dot_gnu_version_r_sec(0)
+  , dot_gnu_version_d_sec(0)
+  , dt_soname(0)
+ {
 
   const uint64_t FileSize = Data->getBufferSize();
 
@@ -1300,31 +1512,60 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
   SymbolTableSections.push_back(NULL);
 
   for (uint64_t i = 0, e = getNumSections(); i != e; ++i) {
-    if (sh->sh_type == ELF::SHT_SYMTAB_SHNDX) {
+    switch (sh->sh_type) {
+    case ELF::SHT_SYMTAB_SHNDX: {
       if (SymbolTableSectionHeaderIndex)
         // FIXME: Proper error handling.
         report_fatal_error("More than one .symtab_shndx!");
       SymbolTableSectionHeaderIndex = sh;
+      break;
     }
-    if (sh->sh_type == ELF::SHT_SYMTAB) {
+    case ELF::SHT_SYMTAB: {
       SymbolTableSectionsIndexMap[i] = SymbolTableSections.size();
       SymbolTableSections.push_back(sh);
+      break;
     }
-    if (sh->sh_type == ELF::SHT_DYNSYM) {
+    case ELF::SHT_DYNSYM: {
       if (SymbolTableSections[0] != NULL)
         // FIXME: Proper error handling.
         report_fatal_error("More than one .dynsym!");
       SymbolTableSectionsIndexMap[i] = 0;
       SymbolTableSections[0] = sh;
+      break;
     }
-    if (sh->sh_type == ELF::SHT_REL || sh->sh_type == ELF::SHT_RELA) {
+    case ELF::SHT_REL:
+    case ELF::SHT_RELA: {
       SectionRelocMap[getSection(sh->sh_info)].push_back(i);
+      break;
     }
-    if (sh->sh_type == ELF::SHT_DYNAMIC) {
+    case ELF::SHT_DYNAMIC: {
       if (dot_dynamic_sec != NULL)
         // FIXME: Proper error handling.
         report_fatal_error("More than one .dynamic!");
       dot_dynamic_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_versym: {
+      if (dot_gnu_version_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version section!");
+      dot_gnu_version_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_verdef: {
+      if (dot_gnu_version_d_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version_d section!");
+      dot_gnu_version_d_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_verneed: {
+      if (dot_gnu_version_r_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version_r section!");
+      dot_gnu_version_r_sec = sh;
+      break;
+    }
     }
     ++sh;
   }
@@ -1774,6 +2015,89 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 }
 
 template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolVersion(const Elf_Shdr *section,
+                                           const Elf_Sym *symb,
+                                           StringRef &Version,
+                                           bool &IsDefault) const {
+  // Handle non-dynamic symbols.
+  if (section != SymbolTableSections[0]) {
+    // Non-dynamic symbols can have versions in their names
+    // A name of the form 'foo@V1' indicates version 'V1', non-default.
+    // A name of the form 'foo@@V2' indicates version 'V2', default version.
+    StringRef Name;
+    error_code ec = getSymbolName(section, symb, Name);
+    if (ec != object_error::success)
+      return ec;
+    size_t atpos = Name.find('@');
+    if (atpos == StringRef::npos) {
+      Version = "";
+      IsDefault = false;
+      return object_error::success;
+    }
+    ++atpos;
+    if (atpos < Name.size() && Name[atpos] == '@') {
+      IsDefault = true;
+      ++atpos;
+    } else {
+      IsDefault = false;
+    }
+    Version = Name.substr(atpos);
+    return object_error::success;
+  }
+
+  // This is a dynamic symbol. Look in the GNU symbol version table.
+  if (dot_gnu_version_sec == NULL) {
+    // No version table.
+    Version = "";
+    IsDefault = false;
+    return object_error::success;
+  }
+
+  // Determine the position in the symbol table of this entry.
+  const char *sec_start = (const char*)base() + section->sh_offset;
+  size_t entry_index = ((const char*)symb - sec_start)/section->sh_entsize;
+
+  // Get the corresponding version index entry
+  const Elf_Versym *vs = getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
+  size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+
+  // Special markers for unversioned symbols.
+  if (version_index == ELF::VER_NDX_LOCAL ||
+      version_index == ELF::VER_NDX_GLOBAL) {
+    Version = "";
+    IsDefault = false;
+    return object_error::success;
+  }
+
+  // Lookup this symbol in the version table
+  LoadVersionMap();
+  if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+    report_fatal_error("Symbol has version index without corresponding "
+                       "define or reference entry");
+  const VersionMapEntry &entry = VersionMap[version_index];
+
+  // Get the version name string
+  size_t name_offset;
+  if (entry.isVerdef()) {
+    // The first Verdaux entry holds the name.
+    name_offset = entry.getVerdef()->getAux()->vda_name;
+  } else {
+    name_offset = entry.getVernaux()->vna_name;
+  }
+  Version = getString(dot_dynstr_sec, name_offset);
+
+  // Set IsDefault
+  if (entry.isVerdef()) {
+    IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+  } else {
+    IsDefault = false;
+  }
+
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
 inline DynRefImpl<target_endianness, is64Bits>
                  ::DynRefImpl(DataRefImpl DynP, const OwningType *Owner)
   : DynPimpl(DynP)
@@ -1821,6 +2145,35 @@ inline DataRefImpl DynRefImpl<target_endianness, is64Bits>
   return DynPimpl;
 }
 
+/// This is a generic interface for retrieving GNU symbol version
+/// information from an ELFObjectFile.
+static inline error_code GetELFSymbolVersion(const ObjectFile *Obj,
+                                             const SymbolRef &Sym,
+                                             StringRef &Version,
+                                             bool &IsDefault) {
+  // Little-endian 32-bit
+  if (const ELFObjectFile<support::little, false> *ELFObj =
+          dyn_cast<ELFObjectFile<support::little, false> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Big-endian 32-bit
+  if (const ELFObjectFile<support::big, false> *ELFObj =
+          dyn_cast<ELFObjectFile<support::big, false> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Little-endian 64-bit
+  if (const ELFObjectFile<support::little, true> *ELFObj =
+          dyn_cast<ELFObjectFile<support::little, true> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Big-endian 64-bit
+  if (const ELFObjectFile<support::big, true> *ELFObj =
+          dyn_cast<ELFObjectFile<support::big, true> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF");
+}
+
 }
 }
 
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 1aae85a..1e409b2 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -47,7 +47,7 @@ public:
   MachOObject *getObject() { return MachOObj; }
 
   static inline bool classof(const Binary *v) {
-    return v->getType() == isMachO;
+    return v->isMachO();
   }
   static inline bool classof(const MachOObjectFile *v) { return true; }
 
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 1e9d895..09eb7fc 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -372,8 +372,7 @@ public:
   static ObjectFile *createObjectFile(MemoryBuffer *Object);
 
   static inline bool classof(const Binary *v) {
-    return v->getType() >= isObject &&
-           v->getType() < lastObject;
+    return v->isObject();
   }
   static inline bool classof(const ObjectFile *v) { return true; }
 
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 9c5e7ec..d0b186e 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -49,22 +49,22 @@
 #define LLVM_ATTRIBUTE_UNUSED
 #endif
 
-#ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions.
-#define LLVM_ATTRIBUTE_READNONE __attribute__((__const__))
+#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
 #else
-#define LLVM_ATTRIBUTE_READNONE
+#define LLVM_ATTRIBUTE_WEAK
 #endif
 
-#ifdef __GNUC__  // aka 'ATTRIBUTE_PURE' but following LLVM Conventions.
-#define LLVM_ATTRIBUTE_READONLY __attribute__((__pure__))
+#ifdef __GNUC__ // aka 'CONST' but following LLVM Conventions.
+#define LLVM_READNONE __attribute__((__const__))
 #else
-#define LLVM_ATTRIBUTE_READONLY
+#define LLVM_READNONE
 #endif
 
-#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
-#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
+#ifdef __GNUC__  // aka 'PURE' but following LLVM Conventions.
+#define LLVM_READONLY __attribute__((__pure__))
 #else
-#define LLVM_ATTRIBUTE_WEAK
+#define LLVM_READONLY
 #endif
 
 #if (__GNUC__ >= 4)
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
index d17f35f..a3a6489 100644
--- a/include/llvm/Support/DataTypes.h.cmake
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -94,6 +94,9 @@ typedef u_int64_t uint64_t;
 #else /* _MSC_VER */
 /* Visual C++ doesn't provide standard integer headers, but it does provide
    built-in data types. */
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
 #include <stdlib.h>
 #include <stddef.h>
 #include <sys/types.h>
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 7ee363b..7123432 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -734,6 +734,9 @@ enum {
   SHT_GROUP         = 17, // Section group.
   SHT_SYMTAB_SHNDX  = 18, // Indices for SHN_XINDEX entries.
   SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
+  SHT_GNU_verdef    = 0x6ffffffd, // GNU version definitions.
+  SHT_GNU_verneed   = 0x6ffffffe, // GNU version references.
+  SHT_GNU_versym    = 0x6fffffff, // GNU symbol versions table.
   SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
   SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
 
@@ -1109,6 +1112,33 @@ enum {
   DF_STATIC_TLS = 0x10  // Reject attempts to load dynamically.
 };
 
+// ElfXX_VerDef structure version (GNU versioning)
+enum {
+  VER_DEF_NONE    = 0,
+  VER_DEF_CURRENT = 1
+};
+
+// VerDef Flags (ElfXX_VerDef::vd_flags)
+enum {
+  VER_FLG_BASE = 0x1,
+  VER_FLG_WEAK = 0x2,
+  VER_FLG_INFO = 0x4
+};
+
+// Special constants for the version table. (SHT_GNU_versym/.gnu.version)
+enum {
+  VER_NDX_LOCAL  = 0,      // Unversioned local symbol
+  VER_NDX_GLOBAL = 1,      // Unversioned global symbol
+  VERSYM_VERSION = 0x7fff, // Version Index mask
+  VERSYM_HIDDEN  = 0x8000  // Hidden bit (non-default version)
+};
+
+// ElfXX_VerNeed structure version (GNU versioning)
+enum {
+  VER_NEED_NONE = 0,
+  VER_NEED_CURRENT = 1
+};
+
 } // end namespace ELF
 
 } // end namespace llvm
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 4b1f3c9..2001456 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -157,53 +157,54 @@ public:
   // Specific Instruction type classes... note that all of the casts are
   // necessary because we use the instruction classes as opaque types...
   //
-  RetTy visitReturnInst(ReturnInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitBranchInst(BranchInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitSwitchInst(SwitchInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitIndirectBrInst(IndirectBrInst &I)      { DELEGATE(TerminatorInst);}
-  RetTy visitInvokeInst(InvokeInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitResumeInst(ResumeInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitUnreachableInst(UnreachableInst &I)    { DELEGATE(TerminatorInst);}
-  RetTy visitICmpInst(ICmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitFCmpInst(FCmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
-  RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
-  RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
-  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I){ DELEGATE(Instruction); }
-  RetTy visitAtomicRMWInst(AtomicRMWInst &I)        { DELEGATE(Instruction); }
-  RetTy visitFenceInst(FenceInst   &I)              { DELEGATE(Instruction); }
-  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
-  RetTy visitPHINode(PHINode       &I)              { DELEGATE(Instruction); }
-  RetTy visitTruncInst(TruncInst &I)                { DELEGATE(CastInst); }
-  RetTy visitZExtInst(ZExtInst &I)                  { DELEGATE(CastInst); }
-  RetTy visitSExtInst(SExtInst &I)                  { DELEGATE(CastInst); }
-  RetTy visitFPTruncInst(FPTruncInst &I)            { DELEGATE(CastInst); }
-  RetTy visitFPExtInst(FPExtInst &I)                { DELEGATE(CastInst); }
-  RetTy visitFPToUIInst(FPToUIInst &I)              { DELEGATE(CastInst); }
-  RetTy visitFPToSIInst(FPToSIInst &I)              { DELEGATE(CastInst); }
-  RetTy visitUIToFPInst(UIToFPInst &I)              { DELEGATE(CastInst); }
-  RetTy visitSIToFPInst(SIToFPInst &I)              { DELEGATE(CastInst); }
-  RetTy visitPtrToIntInst(PtrToIntInst &I)          { DELEGATE(CastInst); }
-  RetTy visitIntToPtrInst(IntToPtrInst &I)          { DELEGATE(CastInst); }
-  RetTy visitBitCastInst(BitCastInst &I)            { DELEGATE(CastInst); }
-  RetTy visitSelectInst(SelectInst &I)              { DELEGATE(Instruction); }
-  RetTy visitCallInst(CallInst     &I)              { DELEGATE(Instruction); }
-  RetTy visitVAArgInst(VAArgInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitReturnInst(ReturnInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitBranchInst(BranchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitSwitchInst(SwitchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
+  RetTy visitInvokeInst(InvokeInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
+  RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitLoadInst(LoadInst     &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitStoreInst(StoreInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);}
+  RetTy visitAtomicRMWInst(AtomicRMWInst &I)      { DELEGATE(Instruction);}
+  RetTy visitFenceInst(FenceInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction);}
+  RetTy visitPHINode(PHINode       &I)            { DELEGATE(Instruction);}
+  RetTy visitTruncInst(TruncInst &I)              { DELEGATE(CastInst);}
+  RetTy visitZExtInst(ZExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitSExtInst(SExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitFPTruncInst(FPTruncInst &I)          { DELEGATE(CastInst);}
+  RetTy visitFPExtInst(FPExtInst &I)              { DELEGATE(CastInst);}
+  RetTy visitFPToUIInst(FPToUIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitFPToSIInst(FPToSIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitUIToFPInst(UIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitSIToFPInst(SIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitPtrToIntInst(PtrToIntInst &I)        { DELEGATE(CastInst);}
+  RetTy visitIntToPtrInst(IntToPtrInst &I)        { DELEGATE(CastInst);}
+  RetTy visitBitCastInst(BitCastInst &I)          { DELEGATE(CastInst);}
+  RetTy visitSelectInst(SelectInst &I)            { DELEGATE(Instruction);}
+  RetTy visitCallInst(CallInst     &I)            { DELEGATE(Instruction);}
+  RetTy visitVAArgInst(VAArgInst   &I)            { DELEGATE(UnaryInstruction);}
   RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);}
-  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction); }
-  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction); }
-  RetTy visitExtractValueInst(ExtractValueInst &I)  { DELEGATE(Instruction);}
-  RetTy visitInsertValueInst(InsertValueInst &I)    { DELEGATE(Instruction); }
-  RetTy visitLandingPadInst(LandingPadInst &I)      { DELEGATE(Instruction); }
+  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction);}
+  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction);}
+  RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
+  RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
+  RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
 
   // Next level propagators: If the user does not overload a specific
   // instruction type, they can overload one of these to get the whole class
   // of instructions...
   //
-  RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction); }
-  RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction); }
-  RetTy visitCmpInst(CmpInst &I)               { DELEGATE(Instruction); }
-  RetTy visitCastInst(CastInst &I)             { DELEGATE(Instruction); }
+  RetTy visitCastInst(CastInst &I)                { DELEGATE(UnaryInstruction);}
+  RetTy visitBinaryOperator(BinaryOperator &I)    { DELEGATE(Instruction);}
+  RetTy visitCmpInst(CmpInst &I)                  { DELEGATE(Instruction);}
+  RetTy visitTerminatorInst(TerminatorInst &I)    { DELEGATE(Instruction);}
+  RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
 
   // If the user wants a 'default' case, they can choose to override this
   // function.  If this function is not overloaded in the user's subclass, then
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
index 493491a..44a7a79 100644
--- a/include/llvm/Support/MachO.h
+++ b/include/llvm/Support/MachO.h
@@ -114,6 +114,10 @@ namespace llvm {
       LoadCommandVersionMinIPhoneOS       = 0x00000025u, // LC_VERSION_MIN_IPHONEOS
       LoadCommandFunctionStarts           = 0x00000026u, // LC_FUNCTION_STARTS
       LoadCommandDyldEnvironment          = 0x00000027u, // LC_DYLD_ENVIRONMENT
+      LoadCommandMain                     = 0x80000028u, // LC_MAIN
+      LoadCommandDataInCode               = 0x00000029u, // LC_DATA_IN_CODE
+      LoadCommandSourceVersion            = 0x0000002Au, // LC_SOURCE_VERSION
+      LoadCommandCodeSignDRs              = 0x0000002Bu, // LC_DYLIB_CODE_SIGN_DRS
 
       // Constant bits for the "flags" field in llvm::MachO::segment_command
       SegmentCommandFlagBitHighVM             = 0x1u, // SG_HIGHVM
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index 85d90b1..0cb8e97 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -121,12 +121,44 @@ template <> struct is_integral_impl<unsigned long long> : true_type {};
 template <typename T>
 struct is_integral : is_integral_impl<T> {};
 
+/// \brief Metafunction to remove reference from a type.
+template <typename T> struct remove_reference { typedef T type; };
+template <typename T> struct remove_reference<T&> { typedef T type; };
+
 /// \brief Metafunction that determines whether the given type is a pointer
 /// type.
 template <typename T> struct is_pointer : false_type {};
 template <typename T> struct is_pointer<T*> : true_type {};
+template <typename T> struct is_pointer<T* const> : true_type {};
+template <typename T> struct is_pointer<T* volatile> : true_type {};
+template <typename T> struct is_pointer<T* const volatile> : true_type {};
+
+/// \brief Metafunction that determines whether the given type is either an
+/// integral type or an enumeration type.
+///
+/// Note that this accepts potentially more integral types than we whitelist
+/// above for is_integral because it is based on merely being convertible
+/// implicitly to an integral type.
+template <typename T> class is_integral_or_enum {
+  // Provide an overload which can be called with anything implicitly
+  // convertible to an unsigned long long. This should catch integer types and
+  // enumeration types at least. We blacklist classes with conversion operators
+  // below.
+  static double check_int_convertible(unsigned long long);
+  static char check_int_convertible(...);
+
+  typedef typename remove_reference<T>::type UnderlyingT;
+  static UnderlyingT &nonce_instance;
+
+public:
+  enum {
+    value = (!is_class<UnderlyingT>::value && !is_pointer<UnderlyingT>::value &&
+             !is_same<UnderlyingT, float>::value &&
+             !is_same<UnderlyingT, double>::value &&
+             sizeof(char) != sizeof(check_int_convertible(nonce_instance)))
+  };
+};
 
-  
 // enable_if_c - Enable/disable a template based on a metafunction
 template<bool Cond, typename T = void>
 struct enable_if_c {
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 4f3e432..793b080 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -41,6 +41,7 @@ namespace llvm {
   class FastISel;
   class FunctionLoweringInfo;
   class ImmutableCallSite;
+  class IntrinsicInst;
   class MachineBasicBlock;
   class MachineFunction;
   class MachineInstr;
@@ -1539,6 +1540,17 @@ public:
     AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {}
   };
 
+  /// GetAddrModeArguments - CodeGenPrepare sinks address calculations into the
+  /// same BB as Load/Store instructions reading the address.  This allows as
+  /// much computation as possible to be done in the address mode for that
+  /// operand.  This hook lets targets also pass back when this should be done
+  /// on intrinsics which load/store.
+  virtual bool GetAddrModeArguments(IntrinsicInst *I,
+                                    SmallVectorImpl<Value*> &Ops,
+                                    Type *&AccessTy) const {
+    return false;
+  }
+
   /// isLegalAddressingMode - Return true if the addressing mode represented by
   /// AM is legal for this target, for a load/store of the specified type.
   /// The type may be VoidTy, in which case only return true if the addressing
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 1feaaa4..f59479d 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -79,10 +79,14 @@ struct Inliner : public CallGraphSCCPass {
   /// has been inlined.
   virtual void growCachedCostInfo(Function *Caller, Function *Callee) = 0;
 
-  /// removeDeadFunctions - Remove dead functions that are not included in
-  /// DNR (Do Not Remove) list.
-  bool removeDeadFunctions(CallGraph &CG, 
-                           SmallPtrSet<const Function *, 16> *DNR = NULL);
+  /// removeDeadFunctions - Remove dead functions.
+  ///
+  /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag
+  /// which restricts it to deleting functions with an 'AlwaysInline'
+  /// attribute. This is useful for the InlineAlways pass that only wants to
+  /// deal with that subset of the functions.
+  bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
+
 private:
   // InlineThreshold - Cache the value here for easy access.
   unsigned InlineThreshold;
diff --git a/include/llvm/Transforms/Utils/BasicInliner.h b/include/llvm/Transforms/Utils/BasicInliner.h
deleted file mode 100644
index 4bca6b8..0000000
--- a/include/llvm/Transforms/Utils/BasicInliner.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- BasicInliner.h - Basic function level inliner ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BASICINLINER_H
-#define BASICINLINER_H
-
-#include "llvm/Analysis/InlineCost.h"
-
-namespace llvm {
-
-  class Function;
-  class TargetData;
-  struct BasicInlinerImpl;
-
-  /// BasicInliner - BasicInliner provides function level inlining interface.
-  /// Clients provide list of functions which are inline without using
-  /// module level call graph information. Note that the BasicInliner is
-  /// free to delete a function if it is inlined into all call sites.
-  class BasicInliner {
-  public:
-    
-    explicit BasicInliner(TargetData *T = NULL);
-    ~BasicInliner();
-
-    /// addFunction - Add function into the list of functions to process.
-    /// All functions must be inserted using this interface before invoking
-    /// inlineFunctions().
-    void addFunction(Function *F);
-
-    /// neverInlineFunction - Sometimes a function is never to be inlined 
-    /// because of one or other reason. 
-    void neverInlineFunction(Function *F);
-
-    /// inlineFuctions - Walk all call sites in all functions supplied by
-    /// client. Inline as many call sites as possible. Delete completely
-    /// inlined functions.
-    void inlineFunctions();
-
-  private:
-    BasicInlinerImpl *Impl;
-  };
-}
-
-#endif
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 9c658cf..380af0b 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -263,14 +263,32 @@ public:
     return true; // Values are always values.
   }
 
-  /// stripPointerCasts - This method strips off any unneeded pointer
-  /// casts from the specified value, returning the original uncasted value.
-  /// Note that the returned value has pointer type if the specified value does.
+  /// stripPointerCasts - This method strips off any unneeded pointer casts and
+  /// all-zero GEPs from the specified value, returning the original uncasted
+  /// value. If this is called on a non-pointer value, it returns 'this'.
   Value *stripPointerCasts();
   const Value *stripPointerCasts() const {
     return const_cast<Value*>(this)->stripPointerCasts();
   }
 
+  /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and
+  /// all-constant GEPs from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsConstantOffsets();
+  const Value *stripInBoundsConstantOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsConstantOffsets();
+  }
+
+  /// stripInBoundsOffsets - This method strips off unneeded pointer casts and
+  /// any in-bounds Offsets from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsOffsets();
+  const Value *stripInBoundsOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsOffsets();
+  }
+
   /// isDereferenceablePointer - Test if this value is always a pointer to
   /// allocated and suitably aligned memory for a simple load or store.
   bool isDereferenceablePointer() const;
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index cb1e1eb..2e3ec8b 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
+  CodeMetrics.cpp
   ConstantFolding.cpp
   DIBuilder.cpp
   DbgInfoPrinter.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 0000000..6c93f78
--- /dev/null
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,176 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+  if (!F) return false;
+
+  if (F->hasLocalLinkage()) return false;
+
+  if (!F->hasName()) return false;
+
+  StringRef Name = F->getName();
+
+  // These will all likely lower to a single selection DAG node.
+  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+      Name == "sin" || Name == "sinf" || Name == "sinl" ||
+      Name == "cos" || Name == "cosf" || Name == "cosl" ||
+      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+    return true;
+
+  // These are all likely to be optimized into something smaller.
+  if (Name == "pow" || Name == "powf" || Name == "powl" ||
+      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+      Name == "floor" || Name == "floorf" || Name == "ceil" ||
+      Name == "round" || Name == "ffs" || Name == "ffsl" ||
+      Name == "abs" || Name == "labs" || Name == "llabs")
+    return true;
+
+  return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+                                    const TargetData *TD) {
+  ++NumBlocks;
+  unsigned NumInstsBeforeThisBB = NumInsts;
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
+        switch (IntrinsicI->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::dbg_declare:
+        case Intrinsic::dbg_value:
+        case Intrinsic::invariant_start:
+        case Intrinsic::invariant_end:
+        case Intrinsic::lifetime_start:
+        case Intrinsic::lifetime_end:
+        case Intrinsic::objectsize:
+        case Intrinsic::ptr_annotation:
+        case Intrinsic::var_annotation:
+          // These intrinsics don't count as size.
+          continue;
+        }
+      }
+
+      ImmutableCallSite CS(cast<Instruction>(II));
+
+      if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is
+        // extremely likely to get inlined in the future (it was probably
+        // exposed by an interleaved devirtualization pass).
+        if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          isRecursive = true;
+      }
+
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+        // Each argument to a call takes on average one instruction to set up.
+        NumInsts += CS.arg_size();
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
+      }
+    }
+
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+      ++NumVectorInsts;
+
+    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      // Noop casts, including ptr <-> int,  don't count.
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
+          isa<PtrToIntInst>(CI))
+        continue;
+      // trunc to a native type is free (assuming the target has compare and
+      // shift-right of the same width).
+      if (isa<TruncInst>(CI) && TD &&
+          TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+        continue;
+      // Result of a cmp instruction is often extended (to be used by other
+      // cmp instructions, logical or return instructions). These are usually
+      // nop on most sane targets.
+      if (isa<CmpInst>(CI->getOperand(0)))
+        continue;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+      // If a GEP has all constant indices, it will probably be folded with
+      // a load/store.
+      if (GEPI->hasAllConstantIndices())
+        continue;
+    }
+
+    ++NumInsts;
+  }
+
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions
+  // with indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.  And as a QOI issue,
+  // if someone is using a blockaddress without an indirectbr, and that
+  // reference somehow ends up in another function or global, we probably
+  // don't want to inline this function.
+  if (isa<IndirectBrInst>(BB->getTerminator()))
+    containsIndirectBr = true;
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
+  // If this function contains a call that "returns twice" (e.g., setjmp or
+  // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
+  // This is a hack because we depend on the user marking their local variables
+  // as volatile if they are live across a setjmp call, and they probably
+  // won't do this in callers.
+  exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
+    !F->hasFnAttr(Attribute::ReturnsTwice);
+
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB, TD);
+}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 585a087..e30c0a9 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
     return 0;
 
   if (Elt < DbgNode->getNumOperands())
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+    if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
       return CI->getZExtValue();
 
   return 0;
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index cad22f8..463584d 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -79,10 +79,39 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
   return false;
 }
 
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+                                 const LoopInfo *LI,
+                                 SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+  Loop *NearestLoop = 0;
+  for (DomTreeNode *Rung = DT->getNode(BB);
+       Rung; Rung = Rung->getIDom()) {
+    BasicBlock *DomBB = Rung->getBlock();
+    Loop *DomLoop = LI->getLoopFor(DomBB);
+    if (DomLoop && DomLoop->getHeader() == DomBB) {
+      // If the domtree walk reaches a loop with no preheader, return false.
+      if (!DomLoop->isLoopSimplifyForm())
+        return false;
+      // If we have already checked this loop nest, stop checking.
+      if (SimpleLoopNests.count(DomLoop))
+        break;
+      // If we have not already checked this loop nest, remember the loop
+      // header nearest to BB. The nearest loop may not contain BB.
+      if (!NearestLoop)
+        NearestLoop = DomLoop;
+    }
+  }
+  if (NearestLoop)
+    SimpleLoopNests.insert(NearestLoop);
+  return true;
+}
+
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I,
+                                    SmallPtrSet<Loop*,16> &SimpleLoopNests) {
   // Add this IV user to the Processed set before returning false to ensure that
   // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
   if (!Processed.insert(I))
@@ -117,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     if (isa<PHINode>(User) && Processed.count(User))
       continue;
 
+    // Only consider IVUsers that are dominated by simplified loop
+    // headers. Otherwise, SCEVExpander will crash.
+    BasicBlock *UseBB = User->getParent();
+    // A phi's use is live out of its predecessor block.
+    if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+      unsigned OperandNo = UI.getOperandNo();
+      unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+      UseBB = PHI->getIncomingBlock(ValNo);
+    }
+    if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+      return false;
+
     // Descend recursively, but not into PHI nodes outside the current loop.
     // It's important to see the entire expression outside the loop to get
     // choices that depend on addressing mode use right, although we won't
@@ -126,12 +167,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User)) {
+          !AddUsersIfInteresting(User, SimpleLoopNests)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
-    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+    } else if (Processed.count(User)
+               || !AddUsersIfInteresting(User, SimpleLoopNests)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -180,11 +222,16 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   SE = &getAnalysis<ScalarEvolution>();
   TD = getAnalysisIfAvailable<TargetData>();
 
+  // SCEVExpander can only handle users that are dominated by simplified loop
+  // entries. Keep track of all loops that are only dominated by other simple
+  // loops so we don't traverse the domtree for each user.
+  SmallPtrSet<Loop*,16> SimpleLoopNests;
+
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I);
+    (void)AddUsersIfInteresting(I, SimpleLoopNests);
 
   return false;
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index b326ba7..dedbfeb 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -20,165 +20,27 @@
 
 using namespace llvm;
 
-/// callIsSmall - If a call is likely to lower to a single target instruction,
-/// or is otherwise deemed small return true.
-/// TODO: Perhaps calls like memcpy, strcpy, etc?
-bool llvm::callIsSmall(const Function *F) {
-  if (!F) return false;
-
-  if (F->hasLocalLinkage()) return false;
-
-  if (!F->hasName()) return false;
-
-  StringRef Name = F->getName();
-
-  // These will all likely lower to a single selection DAG node.
-  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
-      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
-      Name == "sin" || Name == "sinf" || Name == "sinl" ||
-      Name == "cos" || Name == "cosf" || Name == "cosl" ||
-      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
-    return true;
-
-  // These are all likely to be optimized into something smaller.
-  if (Name == "pow" || Name == "powf" || Name == "powl" ||
-      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
-      Name == "floor" || Name == "floorf" || Name == "ceil" ||
-      Name == "round" || Name == "ffs" || Name == "ffsl" ||
-      Name == "abs" || Name == "labs" || Name == "llabs")
-    return true;
-
-  return false;
-}
-
-/// analyzeBasicBlock - Fill in the current structure with information gleaned
-/// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
-                                    const TargetData *TD) {
-  ++NumBlocks;
-  unsigned NumInstsBeforeThisBB = NumInsts;
-  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
-       II != E; ++II) {
-    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
-
-    // Special handling for calls.
-    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
-      if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
-        switch (IntrinsicI->getIntrinsicID()) {
-        default: break;
-        case Intrinsic::dbg_declare:
-        case Intrinsic::dbg_value:
-        case Intrinsic::invariant_start:
-        case Intrinsic::invariant_end:
-        case Intrinsic::lifetime_start:
-        case Intrinsic::lifetime_end:
-        case Intrinsic::objectsize:
-        case Intrinsic::ptr_annotation:
-        case Intrinsic::var_annotation:
-          // These intrinsics don't count as size.
-          continue;
-        }
-      }
-
-      ImmutableCallSite CS(cast<Instruction>(II));
-
-      if (const Function *F = CS.getCalledFunction()) {
-        // If a function is both internal and has a single use, then it is
-        // extremely likely to get inlined in the future (it was probably
-        // exposed by an interleaved devirtualization pass).
-        if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
-          ++NumInlineCandidates;
-
-        // If this call is to function itself, then the function is recursive.
-        // Inlining it into other functions is a bad idea, because this is
-        // basically just a form of loop peeling, and our metrics aren't useful
-        // for that case.
-        if (F == BB->getParent())
-          isRecursive = true;
-      }
-
-      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
-        // Each argument to a call takes on average one instruction to set up.
-        NumInsts += CS.arg_size();
-
-        // We don't want inline asm to count as a call - that would prevent loop
-        // unrolling. The argument setup cost is still real, though.
-        if (!isa<InlineAsm>(CS.getCalledValue()))
-          ++NumCalls;
-      }
-    }
-
-    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
-      if (!AI->isStaticAlloca())
-        this->usesDynamicAlloca = true;
-    }
-
-    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
-      ++NumVectorInsts;
-
-    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
-      // Noop casts, including ptr <-> int,  don't count.
-      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
-          isa<PtrToIntInst>(CI))
-        continue;
-      // trunc to a native type is free (assuming the target has compare and
-      // shift-right of the same width).
-      if (isa<TruncInst>(CI) && TD &&
-          TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
-        continue;
-      // Result of a cmp instruction is often extended (to be used by other
-      // cmp instructions, logical or return instructions). These are usually
-      // nop on most sane targets.
-      if (isa<CmpInst>(CI->getOperand(0)))
-        continue;
-    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
-      // If a GEP has all constant indices, it will probably be folded with
-      // a load/store.
-      if (GEPI->hasAllConstantIndices())
+unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
+    const CodeMetrics &Metrics, Value *V) {
+  unsigned Reduction = 0;
+  SmallVector<Value *, 4> Worklist;
+  Worklist.push_back(V);
+  do {
+    Value *V = Worklist.pop_back_val();
+    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+      User *U = *UI;
+      if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+        // We will be able to eliminate all but one of the successors.
+        const TerminatorInst &TI = cast<TerminatorInst>(*U);
+        const unsigned NumSucc = TI.getNumSuccessors();
+        unsigned Instrs = 0;
+        for (unsigned I = 0; I != NumSucc; ++I)
+          Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I));
+        // We don't know which blocks will be eliminated, so use the average size.
+        Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
         continue;
-    }
-
-    ++NumInsts;
-  }
-
-  if (isa<ReturnInst>(BB->getTerminator()))
-    ++NumRets;
-
-  // We never want to inline functions that contain an indirectbr.  This is
-  // incorrect because all the blockaddress's (in static global initializers
-  // for example) would be referring to the original function, and this indirect
-  // jump would jump from the inlined copy of the function into the original
-  // function which is extremely undefined behavior.
-  // FIXME: This logic isn't really right; we can safely inline functions
-  // with indirectbr's as long as no other function or global references the
-  // blockaddress of a block within the current function.  And as a QOI issue,
-  // if someone is using a blockaddress without an indirectbr, and that
-  // reference somehow ends up in another function or global, we probably
-  // don't want to inline this function.
-  if (isa<IndirectBrInst>(BB->getTerminator()))
-    containsIndirectBr = true;
-
-  // Remember NumInsts for this BB.
-  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
-}
+      }
 
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
-  unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    User *U = *UI;
-    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
-      // We will be able to eliminate all but one of the successors.
-      const TerminatorInst &TI = cast<TerminatorInst>(*U);
-      const unsigned NumSucc = TI.getNumSuccessors();
-      unsigned Instrs = 0;
-      for (unsigned I = 0; I != NumSucc; ++I)
-        Instrs += NumBBInsts[TI.getSuccessor(I)];
-      // We don't know which blocks will be eliminated, so use the average size.
-      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-    } else {
       // Figure out if this instruction will be removed due to simple constant
       // propagation.
       Instruction &Inst = cast<Instruction>(*U);
@@ -200,33 +62,186 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
           AllOperandsConstant = false;
           break;
         }
+      if (!AllOperandsConstant)
+        continue;
 
-      if (AllOperandsConstant) {
-        // We will get to remove this instruction...
-        Reduction += InlineConstants::InstrCost;
+      // We will get to remove this instruction...
+      Reduction += InlineConstants::InstrCost;
 
-        // And any other instructions that use it which become constants
-        // themselves.
-        Reduction += CountCodeReductionForConstant(&Inst);
+      // And any other instructions that use it which become constants
+      // themselves.
+      Worklist.push_back(&Inst);
+    }
+  } while (!Worklist.empty());
+  return Reduction;
+}
+
+static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
+                                                ICmpInst *ICI) {
+  unsigned Reduction = 0;
+
+  // Bail if this is comparing against a non-constant; there is nothing we can
+  // do there.
+  if (!isa<Constant>(ICI->getOperand(1)))
+    return Reduction;
+
+  // An icmp pred (alloca, C) becomes true if the predicate is true when
+  // equal and false otherwise.
+  bool Result = ICI->isTrueWhenEqual();
+
+  SmallVector<Instruction *, 4> Worklist;
+  Worklist.push_back(ICI);
+  do {
+    Instruction *U = Worklist.pop_back_val();
+    Reduction += InlineConstants::InstrCost;
+    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+         UI != UE; ++UI) {
+      Instruction *I = dyn_cast<Instruction>(*UI);
+      if (!I || I->mayHaveSideEffects()) continue;
+      if (I->getNumOperands() == 1)
+        Worklist.push_back(I);
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+        // If BO produces the same value as U, then the other operand is
+        // irrelevant and we can put it into the Worklist to continue
+        // deleting dead instructions. If BO produces the same value as the
+        // other operand, we can delete BO but that's it.
+        if (Result == true) {
+          if (BO->getOpcode() == Instruction::Or)
+            Worklist.push_back(I);
+          if (BO->getOpcode() == Instruction::And)
+            Reduction += InlineConstants::InstrCost;
+        } else {
+          if (BO->getOpcode() == Instruction::Or ||
+              BO->getOpcode() == Instruction::Xor)
+            Reduction += InlineConstants::InstrCost;
+          if (BO->getOpcode() == Instruction::And)
+            Worklist.push_back(I);
+        }
+      }
+      if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+        BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
+        if (BB->getSinglePredecessor())
+          Reduction
+            += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
       }
     }
-  }
+  } while (!Worklist.empty());
+
   return Reduction;
 }
 
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+/// \brief Compute the reduction possible for a given instruction if we are able
+/// to SROA an alloca.
+///
+/// The reduction for this instruction is added to the SROAReduction output
+/// parameter. Returns false if this instruction is expected to defeat SROA in
+/// general.
+static bool countCodeReductionForSROAInst(Instruction *I,
+                                          SmallVectorImpl<Value *> &Worklist,
+                                          unsigned &SROAReduction) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isSimple())
+      return false;
+    SROAReduction += InlineConstants::InstrCost;
+    return true;
+  }
+
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (!SI->isSimple())
+      return false;
+    SROAReduction += InlineConstants::InstrCost;
+    return true;
+  }
+
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+    // If the GEP has variable indices, we won't be able to do much with it.
+    if (!GEP->hasAllConstantIndices())
+      return false;
+    // A non-zero GEP will likely become a mask operation after SROA.
+    if (GEP->hasAllZeroIndices())
+      SROAReduction += InlineConstants::InstrCost;
+    Worklist.push_back(GEP);
+    return true;
+  }
+
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+    // Track pointer through bitcasts.
+    Worklist.push_back(BCI);
+    SROAReduction += InlineConstants::InstrCost;
+    return true;
+  }
+
+  // We just look for non-constant operands to ICmp instructions as those will
+  // defeat SROA. The actual reduction for these happens even without SROA.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+    return isa<Constant>(ICI->getOperand(1));
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+    // SROA can handle a select of alloca iff all uses of the alloca are
+    // loads, and dereferenceable. We assume it's dereferenceable since
+    // we're told the input is an alloca.
+    for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+         UI != UE; ++UI) {
+      LoadInst *LI = dyn_cast<LoadInst>(*UI);
+      if (LI == 0 || !LI->isSimple())
+        return false;
+    }
+    // We don't know whether we'll be deleting the rest of the chain of
+    // instructions from the SelectInst on, because we don't know whether
+    // the other side of the select is also an alloca or not.
+    return true;
+  }
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      // SROA can usually chew through these intrinsics.
+      SROAReduction += InlineConstants::InstrCost;
+      return true;
+    }
+  }
+
+  // If there is some other strange instruction, we're not going to be
+  // able to do much if we inline this.
+  return false;
+}
+
+unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
+    const CodeMetrics &Metrics, Value *V) {
   if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
   unsigned Reduction = 0;
+  unsigned SROAReduction = 0;
+  bool CanSROAAlloca = true;
 
-  // Looking at ICmpInsts will never abort the analysis and return zero, and
-  // analyzing them is expensive, so save them for last so that we don't do
-  // extra work that we end up throwing out.
-  SmallVector<ICmpInst *, 4> ICmpInsts;
+  SmallVector<Value *, 4> Worklist;
+  Worklist.push_back(V);
+  do {
+    Value *V = Worklist.pop_back_val();
+    for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+         UI != E; ++UI){
+      Instruction *I = cast<Instruction>(*UI);
+
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+        Reduction += countCodeReductionForAllocaICmp(Metrics, ICI);
+
+      if (CanSROAAlloca)
+        CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist,
+                                                      SROAReduction);
+    }
+  } while (!Worklist.empty());
+
+  return Reduction + (CanSROAAlloca ? SROAReduction : 0);
+}
 
+void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair(
+    const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs,
+    Value *V, unsigned ArgIdx) {
   SmallVector<Value *, 4> Worklist;
   Worklist.push_back(V);
   do {
@@ -234,126 +249,57 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
     for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
          UI != E; ++UI){
       Instruction *I = cast<Instruction>(*UI);
-      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        if (!LI->isSimple())
-          return 0;
-        Reduction += InlineConstants::InstrCost;
-      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        if (!SI->isSimple())
-          return 0;
-        Reduction += InlineConstants::InstrCost;
-      } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
         // If the GEP has variable indices, we won't be able to do much with it.
         if (!GEP->hasAllConstantIndices())
-          return 0;
-        // A non-zero GEP will likely become a mask operation after SROA.
-        if (GEP->hasAllZeroIndices())
-          Reduction += InlineConstants::InstrCost;
+          continue;
+        // Unless the GEP is in-bounds, some comparisons will be non-constant.
+        // Fortunately, the real-world cases where this occurs uses in-bounds
+        // GEPs, and so we restrict the optimization to them here.
+        if (!GEP->isInBounds())
+          continue;
+
+        // Constant indices just change the constant offset. Add the resulting
+        // value both to our worklist for this argument, and to the set of
+        // viable paired values with future arguments.
+        PointerArgs[GEP] = ArgIdx;
         Worklist.push_back(GEP);
-      } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
-        // Track pointer through bitcasts.
-        Worklist.push_back(BCI);
-        Reduction += InlineConstants::InstrCost;
-      } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
-        // SROA can handle a select of alloca iff all uses of the alloca are
-        // loads, and dereferenceable. We assume it's dereferenceable since
-        // we're told the input is an alloca.
-        for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
-             UI != UE; ++UI) {
-          LoadInst *LI = dyn_cast<LoadInst>(*UI);
-          if (LI == 0 || !LI->isSimple()) return 0;
-        }
-        // We don't know whether we'll be deleting the rest of the chain of
-        // instructions from the SelectInst on, because we don't know whether
-        // the other side of the select is also an alloca or not.
         continue;
-      } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-        switch (II->getIntrinsicID()) {
-        default:
-          return 0;
-        case Intrinsic::memset:
-        case Intrinsic::memcpy:
-        case Intrinsic::memmove:
-        case Intrinsic::lifetime_start:
-        case Intrinsic::lifetime_end:
-          // SROA can usually chew through these intrinsics.
-          Reduction += InlineConstants::InstrCost;
-          break;
-        }
-      } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
-        if (!isa<Constant>(ICI->getOperand(1)))
-          return 0;
-        ICmpInsts.push_back(ICI);
-      } else {
-        // If there is some other strange instruction, we're not going to be
-        // able to do much if we inline this.
-        return 0;
       }
-    }
-  } while (!Worklist.empty());
-
-  while (!ICmpInsts.empty()) {
-    ICmpInst *ICI = ICmpInsts.pop_back_val();
 
-    // An icmp pred (alloca, C) becomes true if the predicate is true when
-    // equal and false otherwise.
-    bool Result = ICI->isTrueWhenEqual();
-
-    SmallVector<Instruction *, 4> Worklist;
-    Worklist.push_back(ICI);
-    do {
-      Instruction *U = Worklist.pop_back_val();
-      Reduction += InlineConstants::InstrCost;
-      for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-           UI != UE; ++UI) {
-        Instruction *I = dyn_cast<Instruction>(*UI);
-        if (!I || I->mayHaveSideEffects()) continue;
-        if (I->getNumOperands() == 1)
-          Worklist.push_back(I);
-        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-          // If BO produces the same value as U, then the other operand is
-          // irrelevant and we can put it into the Worklist to continue
-          // deleting dead instructions. If BO produces the same value as the
-          // other operand, we can delete BO but that's it.
-          if (Result == true) {
-            if (BO->getOpcode() == Instruction::Or)
-              Worklist.push_back(I);
-            if (BO->getOpcode() == Instruction::And)
-              Reduction += InlineConstants::InstrCost;
-          } else {
-            if (BO->getOpcode() == Instruction::Or ||
-                BO->getOpcode() == Instruction::Xor)
-              Reduction += InlineConstants::InstrCost;
-            if (BO->getOpcode() == Instruction::And)
-              Worklist.push_back(I);
-          }
-        }
-        if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-          BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
-          if (BB->getSinglePredecessor())
-            Reduction += InlineConstants::InstrCost * NumBBInsts[BB];
-        }
+      // Track pointer through casts. Even when the result is not a pointer, it
+      // remains a constant relative to constants derived from other constant
+      // pointers.
+      if (CastInst *CI = dyn_cast<CastInst>(I)) {
+        PointerArgs[CI] = ArgIdx;
+        Worklist.push_back(CI);
+        continue;
       }
-    } while (!Worklist.empty());
-  }
 
-  return Reduction;
-}
+      // There are two instructions which produce a strict constant value when
+      // applied to two related pointer values. Ignore everything else.
+      if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub)
+        continue;
+      assert(I->getNumOperands() == 2);
+
+      // Ensure that the two operands are in our set of potentially paired
+      // pointers (or are derived from them).
+      Value *OtherArg = I->getOperand(0);
+      if (OtherArg == V)
+        OtherArg = I->getOperand(1);
+      DenseMap<Value *, unsigned>::const_iterator ArgIt
+        = PointerArgs.find(OtherArg);
+      if (ArgIt == PointerArgs.end())
+        continue;
+      std::pair<unsigned, unsigned> ArgPair(ArgIt->second, ArgIdx);
+      if (ArgPair.first > ArgPair.second)
+        std::swap(ArgPair.first, ArgPair.second);
 
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
-  // If this function contains a call that "returns twice" (e.g., setjmp or
-  // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
-  // This is a hack because we depend on the user marking their local variables
-  // as volatile if they are live across a setjmp call, and they probably
-  // won't do this in callers.
-  exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
-    !F->hasFnAttr(Attribute::ReturnsTwice);
-
-  // Look at the size of the callee.
-  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    analyzeBasicBlock(&*BB, TD);
+      PointerArgPairWeights[ArgPair]
+        += countCodeReductionForConstant(Metrics, I);
+    }
+  } while (!Worklist.empty());
 }
 
 /// analyzeFunction - Fill in the current structure with information gleaned
@@ -368,12 +314,25 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
   if (Metrics.NumRets==1)
     --Metrics.NumInsts;
 
-  // Check out all of the arguments to the function, figuring out how much
-  // code can be eliminated if one of the arguments is a constant.
   ArgumentWeights.reserve(F->arg_size());
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
-                                      Metrics.CountCodeReductionForAlloca(I)));
+  DenseMap<Value *, unsigned> PointerArgs;
+  unsigned ArgIdx = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+       ++I, ++ArgIdx) {
+    // Count how much code can be eliminated if one of the arguments is
+    // a constant or an alloca.
+    ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I),
+                                      countCodeReductionForAlloca(Metrics, I)));
+
+    // If the argument is a pointer, also check for pairs of pointers where
+    // knowing a fixed offset between them allows simplification. This pattern
+    // arises mostly due to STL algorithm patterns where pointers are used as
+    // random access iterators.
+    if (!I->getType()->isPointerTy())
+      continue;
+    PointerArgs[I] = ArgIdx;
+    countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx);
+  }
 }
 
 /// NeverInline - returns true if the function should never be inlined into
@@ -382,43 +341,6 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
   return (Metrics.exposesReturnsTwice || Metrics.isRecursive ||
           Metrics.containsIndirectBr);
 }
-// getSpecializationBonus - The heuristic used to determine the per-call
-// performance boost for using a specialization of Callee with argument
-// specializedArgNo replaced by a constant.
-int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
-         SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
-  if (Callee->mayBeOverridden())
-    return 0;
-
-  int Bonus = 0;
-  // If this function uses the coldcc calling convention, prefer not to
-  // specialize it.
-  if (Callee->getCallingConv() == CallingConv::Cold)
-    Bonus -= InlineConstants::ColdccPenalty;
-
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
-
-  unsigned ArgNo = 0;
-  unsigned i = 0;
-  for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
-       I != E; ++I, ++ArgNo)
-    if (ArgNo == SpecializedArgNos[i]) {
-      ++i;
-      Bonus += CountBonusForConstant(I);
-    }
-
-  // Calls usually take a long time, so they make the specialization gain
-  // smaller.
-  Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
-
-  return Bonus;
-}
 
 // ConstantFunctionBonus - Figure out how much of a bonus we can get for
 // possibly devirtualizing a function. We'll subtract the size of the function
@@ -522,6 +444,15 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
       InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
   }
 
+  const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights
+    = CalleeFI->PointerArgPairWeights;
+  for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I
+         = ArgPairWeights.begin(), E = ArgPairWeights.end();
+       I != E; ++I)
+    if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() ==
+        CS.getArgument(I->first.second)->stripInBoundsConstantOffsets())
+      InlineCost -= I->second;
+
   // Each argument passed in has a cost at both the caller and the callee
   // sides.  Measurements show that each argument costs about the same as an
   // instruction.
@@ -589,22 +520,18 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
 // getInlineCost - The heuristic used to determine if we should inline the
 // function call or not.
 //
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
-  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) {
+  return getInlineCost(CS, CS.getCalledFunction());
 }
 
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               Function *Callee,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee) {
   Instruction *TheCall = CS.getInstruction();
   Function *Caller = TheCall->getParent()->getParent();
 
   // Don't inline functions which can be redefined at link-time to mean
   // something else.  Don't inline functions marked noinline or call sites
   // marked noinline.
-  if (Callee->mayBeOverridden() ||
-      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+  if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) ||
       CS.isNoInline())
     return llvm::InlineCost::getNever();
 
@@ -655,38 +582,6 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
   return llvm::InlineCost::get(InlineCost);
 }
 
-// getSpecializationCost - The heuristic used to determine the code-size
-// impact of creating a specialized version of Callee with argument
-// SpecializedArgNo replaced by a constant.
-InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
-                               SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
-  // Don't specialize functions which can be redefined at link-time to mean
-  // something else.
-  if (Callee->mayBeOverridden())
-    return llvm::InlineCost::getNever();
-
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
-
-  int Cost = 0;
-
-  // Look at the original size of the callee.  Each instruction counts as 5.
-  Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
-
-  // Offset that with the amount of code that can be constant-folded
-  // away with the given arguments replaced by constants.
-  for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
-       ae = SpecializedArgNos.end(); an != ae; ++an)
-    Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
-
-  return llvm::InlineCost::get(Cost);
-}
-
 // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
 // higher threshold to determine if the function call should be inlined.
 float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 370ab96..72e33d1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Operator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -26,6 +27,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetData.h"
@@ -38,21 +40,23 @@ STATISTIC(NumExpand,  "Number of expansions");
 STATISTIC(NumFactor , "Number of factorizations");
 STATISTIC(NumReassoc, "Number of reassociations");
 
-static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
-                              const TargetLibraryInfo *, const DominatorTree *,
-                              unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
-                            const TargetLibraryInfo *, const DominatorTree *,
+struct Query {
+  const TargetData *TD;
+  const TargetLibraryInfo *TLI;
+  const DominatorTree *DT;
+
+  Query(const TargetData *td, const TargetLibraryInfo *tli,
+        const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {};
+};
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
                             unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
-                              const TargetLibraryInfo *, const DominatorTree *,
-                              unsigned);
-static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
-                             const TargetLibraryInfo *, const DominatorTree *,
-                             unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
-                              const TargetLibraryInfo *, const DominatorTree *,
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
                               unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
 
 /// getFalse - For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
@@ -91,10 +95,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
     // Arguments and constants dominate all instructions.
     return true;
 
+  // If we are processing instructions (and/or basic blocks) that have not been
+  // fully added to a function, the parent nodes may still be null. Simply
+  // return the conservative answer in these cases.
+  if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+    return false;
+
   // If we have a DominatorTree then do a precise test.
-  if (DT)
-    return !DT->isReachableFromEntry(P->getParent()) ||
-      !DT->isReachableFromEntry(I->getParent()) || DT->dominates(I, P);
+  if (DT) {
+    if (!DT->isReachableFromEntry(P->getParent()))
+      return true;
+    if (!DT->isReachableFromEntry(I->getParent()))
+      return false;
+    return DT->dominates(I, P);
+  }
 
   // Otherwise, if the instruction is in the entry block, and is not an invoke,
   // then it obviously dominates all phi nodes.
@@ -111,8 +125,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
 /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                          unsigned OpcToExpand, const TargetData *TD,
-                          const TargetLibraryInfo *TLI, const DominatorTree *DT,
+                          unsigned OpcToExpand, const Query &Q,
                           unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -125,8 +138,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op C) op' (B op C)".
       Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
       // Do "A op C" and "B op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
           if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -135,8 +148,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return LHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
-                                       MaxRecurse)) {
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
             ++NumExpand;
             return V;
           }
@@ -149,8 +161,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op B) op' (A op C)".
       Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
       // Do "A op B" and "A op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
           if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -159,8 +171,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return RHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
-                                       MaxRecurse)) {
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
             ++NumExpand;
             return V;
           }
@@ -175,9 +186,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             unsigned OpcToExtract, const TargetData *TD, 
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT,
+                             unsigned OpcToExtract, const Query &Q,
                              unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -202,7 +211,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *DD = A == C ? D : C;
     // Form "A op' (B op DD)" if it simplifies completely.
     // Does "B op DD" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
       // It does!  Return "A op' V" if it simplifies or is already available.
       // If V equals B then "A op' V" is just the LHS.  If V equals DD then
       // "A op' V" is just the RHS.
@@ -211,8 +220,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == B ? LHS : RHS;
       }
       // Otherwise return "A op' V" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT,
-                                   MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -226,7 +234,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *CC = B == D ? C : D;
     // Form "(A op CC) op' B" if it simplifies completely..
     // Does "A op CC" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
       // It does!  Return "V op' B" if it simplifies or is already available.
       // If V equals A then "V op' B" is just the LHS.  If V equals CC then
       // "V op' B" is just the RHS.
@@ -235,8 +243,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == A ? LHS : RHS;
       }
       // Otherwise return "V op' B" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT,
-                                   MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -249,10 +256,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// SimplifyAssociativeBinOp - Generic simplifications for associative binary
 /// operations.  Returns the simpler value, or null if none was found.
 static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
-                                       const TargetData *TD,
-                                       const TargetLibraryInfo *TLI,
-                                       const DominatorTree *DT,
-                                       unsigned MaxRecurse) {
+                                       const Query &Q, unsigned MaxRecurse) {
   Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
   assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
 
@@ -270,12 +274,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "B op C" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
       // It does!  Return "A op V" if it simplifies or is already available.
       // If V equals B then "A op V" is just the LHS.
       if (V == B) return LHS;
       // Otherwise return "A op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -289,12 +293,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "A op B" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
       // It does!  Return "V op C" if it simplifies or is already available.
       // If V equals B then "V op C" is just the RHS.
       if (V == B) return RHS;
       // Otherwise return "V op C" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -312,12 +316,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "V op B" if it simplifies or is already available.
       // If V equals A then "V op B" is just the LHS.
       if (V == A) return LHS;
       // Otherwise return "V op B" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -331,12 +335,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "B op V" if it simplifies or is already available.
       // If V equals C then "B op V" is just the RHS.
       if (V == C) return RHS;
       // Otherwise return "B op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -351,10 +355,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
 /// evaluating it on both branches of the select results in the same value.
 /// Returns the common value if so, otherwise returns null.
 static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
-                                    const TargetData *TD,
-                                    const TargetLibraryInfo *TLI,
-                                    const DominatorTree *DT,
-                                    unsigned MaxRecurse) {
+                                    const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -371,11 +372,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
   Value *TV;
   Value *FV;
   if (SI == LHS) {
-    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
   } else {
-    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
   }
 
   // If they simplified to the same value, then return the common value.
@@ -426,9 +427,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
 /// result in the same value.  Returns the common value if so, otherwise returns
 /// null.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
-                                  Value *RHS, const TargetData *TD,
-                                  const TargetLibraryInfo *TLI,
-                                  const DominatorTree *DT,
+                                  Value *RHS, const Query &Q,
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -447,7 +446,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 
   // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
   // Does "cmp TV, RHS" simplify?
-  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse);
+  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
   if (TCmp == Cond) {
     // It not only simplified, it simplified to the select condition.  Replace
     // it with 'true'.
@@ -461,7 +460,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   }
 
   // Does "cmp FV, RHS" simplify?
-  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse);
+  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
   if (FCmp == Cond) {
     // It not only simplified, it simplified to the select condition.  Replace
     // it with 'false'.
@@ -487,19 +486,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   // is equal to "Cond && TCmp".  This also catches the case when the false
   // value simplified to false and the true value to true, returning "Cond".
   if (match(FCmp, m_Zero()))
-    if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse))
+    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
       return V;
   // If the true value simplified to true, then the result of the compare
   // is equal to "Cond || FCmp".
   if (match(TCmp, m_One()))
-    if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse))
+    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
       return V;
   // Finally, if the false value simplified to true and the true value to
   // false, then the result of the compare is equal to "!Cond".
   if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
     if (Value *V =
         SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
-                        TD, TLI, DT, MaxRecurse))
+                        Q, MaxRecurse))
       return V;
 
   return 0;
@@ -510,10 +509,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 /// it on the incoming phi values yields the same result for every value.  If so
 /// returns the common value, otherwise returns null.
 static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
-                                 const TargetData *TD,
-                                 const TargetLibraryInfo *TLI, 
-                                 const DominatorTree *DT,
-                                 unsigned MaxRecurse) {
+                                 const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -522,13 +518,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
   if (isa<PHINode>(LHS)) {
     PI = cast<PHINode>(LHS);
     // Bail out if RHS and the phi may be mutually interdependent due to a loop.
-    if (!ValueDominatesPHI(RHS, PI, DT))
+    if (!ValueDominatesPHI(RHS, PI, Q.DT))
       return 0;
   } else {
     assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
     PI = cast<PHINode>(RHS);
     // Bail out if LHS and the phi may be mutually interdependent due to a loop.
-    if (!ValueDominatesPHI(LHS, PI, DT))
+    if (!ValueDominatesPHI(LHS, PI, Q.DT))
       return 0;
   }
 
@@ -539,8 +535,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
     Value *V = PI == LHS ?
-      SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) :
-      SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse);
+      SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -556,10 +552,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
 /// incoming phi values yields the same result every time.  If so returns the
 /// common result, otherwise returns null.
 static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
-                               const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -573,7 +566,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
   PHINode *PI = cast<PHINode>(LHS);
 
   // Bail out if RHS and the phi may be mutually interdependent due to a loop.
-  if (!ValueDominatesPHI(RHS, PI, DT))
+  if (!ValueDominatesPHI(RHS, PI, Q.DT))
     return 0;
 
   // Evaluate the BinOp on the incoming phi values.
@@ -582,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
     Value *Incoming = PI->getIncomingValue(i);
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
-    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse);
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -596,15 +589,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 /// SimplifyAddInst - Given operands for an Add, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
-      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
-                                      Ops, TD, TLI);
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+                                      Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -634,17 +624,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   /// i1 add -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
-                                TD, TLI, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -662,21 +652,114 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const TargetData *TD, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
+}
+
+/// \brief Accumulate the constant integer offset a GEP represents.
+///
+/// Given a getelementptr instruction/constantexpr, accumulate the constant
+/// offset from the base pointer into the provided APInt 'Offset'. Returns true
+/// if the GEP has all-constant indices. Returns false if any non-constant
+/// index is encountered leaving the 'Offset' in an undefined state. The
+/// 'Offset' APInt must be the bitwidth of the target's pointer size.
+static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP,
+                                APInt &Offset) {
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(*I);
+    if (!OpC) return false;
+    if (OpC->isZero()) continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = TD.getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+  }
+  return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+static Constant *stripAndComputeConstantOffsets(const TargetData &TD,
+                                                Value *&V) {
+  if (!V->getType()->isPointerTy())
+    return 0;
+
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!accumulateGEPOffset(TD, GEP, Offset))
+        break;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  Type *IntPtrTy = TD.getIntPtrType(V->getContext());
+  return ConstantInt::get(IntPtrTy, Offset);
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const TargetData &TD,
+                                          Value *LHS, Value *RHS) {
+  Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+  if (!LHSOffset)
+    return 0;
+  Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+  if (!RHSOffset)
+    return 0;
+
+  // If LHS and RHS are not related via constant offsets to the same base
+  // value, there is nothing we can do here.
+  if (LHS != RHS)
+    return 0;
+
+  // Otherwise, the difference of LHS - RHS can be computed as:
+  //    LHS - RHS
+  //  = (LHSOffset + Base) - (RHSOffset + Base)
+  //  = LHSOffset - RHSOffset
+  return ConstantExpr::getSub(LHSOffset, RHSOffset);
 }
 
 /// SimplifySubInst - Given operands for a Sub, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0))
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
-                                      Ops, TD, TLI);
+                                      Ops, Q.TD, Q.TLI);
     }
 
   // X - undef -> undef
@@ -704,19 +787,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Value *Y = 0, *Z = Op1;
   if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
     // See if "V === Y - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
       // It does!  Now see if "X + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
       // It does!  Now see if "Y + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -728,19 +809,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   X = Op0;
   if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
     // See if "V === X - Y" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
       // It does!  Now see if "V - Z" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
       // It does!  Now see if "V - Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -752,23 +831,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Z = Op0;
   if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
     // See if "V === Z - X" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
       // It does!  Now see if "V + Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
 
+  // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+  if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+      match(Op1, m_Trunc(m_Value(Y))))
+    if (X->getType() == Y->getType())
+      // See if "V === X - Y" simplifies.
+      if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+        // It does!  Now see if "trunc V" simplifies.
+        if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+          // It does, return the simplified "trunc V".
+          return W;
+
+  // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+  if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) &&
+      match(Op1, m_PtrToInt(m_Value(Y))))
+    if (Constant *Result = computePointerDifference(*Q.TD, X, Y))
+      return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
   // Mul distributes over Sub.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
-                                TD, TLI, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // i1 sub -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -784,22 +879,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD,
-                             const TargetLibraryInfo *TLI,
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
 }
 
 /// SimplifyMulInst - Given operands for a Mul, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
-                                      Ops, TD, TLI);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -826,30 +920,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
 
   // i1 mul -> and.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
-                             TD, TLI, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
                                       MaxRecurse))
       return V;
 
@@ -859,18 +953,17 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const TargetLibraryInfo *TLI,
-                          const DominatorTree *DT, unsigned MaxRecurse) {
+                          const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -923,15 +1016,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT,
-                                         MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT,
-                                      MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -939,11 +1030,9 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// SimplifySDivInst - Given operands for an SDiv, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT,
-                             MaxRecurse))
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
@@ -952,16 +1041,14 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyUDivInst - Given operands for a UDiv, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT,
-                             MaxRecurse))
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
@@ -970,12 +1057,11 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
-                               const TargetLibraryInfo *,
-                               const DominatorTree *, unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned) {
   // undef / X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -990,18 +1076,17 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyRem - Given operands for an SRem or URem, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const TargetLibraryInfo *TLI,
-                          const DominatorTree *DT, unsigned MaxRecurse) {
+                          const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -1036,13 +1121,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -1050,11 +1135,9 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// SimplifySRemInst - Given operands for an SRem, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {                               
-  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse))
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
@@ -1063,16 +1146,14 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyURemInst - Given operands for a URem, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
                                unsigned MaxRecurse) {
-  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse))
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
@@ -1081,12 +1162,10 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
-                               const TargetLibraryInfo *,
-                               const DominatorTree *,
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
                                unsigned) {
   // undef % X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
@@ -1102,18 +1181,17 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
-                            const TargetData *TD, const TargetLibraryInfo *TLI,
-                            const DominatorTree *DT, unsigned MaxRecurse) {
+                            const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -1138,13 +1216,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -1153,10 +1231,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
 /// SimplifyShlInst - Given operands for an Shl, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse))
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // undef << X -> 0
@@ -1173,17 +1249,15 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                              const TargetData *TD, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
 }
 
 /// SimplifyLShrInst - Given operands for an LShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // undef >>l X -> 0
@@ -1203,17 +1277,15 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
                               const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyAShrInst - Given operands for an AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // all ones >>a X -> all ones
@@ -1237,20 +1309,19 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                               const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyAndInst - Given operands for an And, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, 
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT,
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
-                                      Ops, TD, TLI);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1292,43 +1363,43 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
   // A & (-A) = A if A is a power of two or zero.
   if (match(Op0, m_Neg(m_Specific(Op1))) ||
       match(Op1, m_Neg(m_Specific(Op0)))) {
-    if (isPowerOfTwo(Op0, TD, /*OrZero*/true))
+    if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true))
       return Op0;
-    if (isPowerOfTwo(Op1, TD, /*OrZero*/true))
+    if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true))
       return Op1;
   }
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI,
-                                          DT, MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
+                                          MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                             TD, TLI, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
-                             TD, TLI, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                                TD, TLI, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI,
-                                         DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
+                                         MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
                                       MaxRecurse))
       return V;
 
@@ -1338,19 +1409,18 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyOrInst - Given operands for an Or, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, 
-                             const TargetLibraryInfo *TLI,
-                             const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+                             unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
-                                      Ops, TD, TLI);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1400,32 +1470,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI,
-                                          DT, MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
+                                          MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
-  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD,
-                             TLI, DT, MaxRecurse))
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+                             MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
-                                TD, TLI, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT,
-                                      MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -1434,19 +1503,18 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
                             const TargetLibraryInfo *TLI,
                             const DominatorTree *DT) {
-  return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyXorInst - Given operands for a Xor, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
-                                      Ops, TD, TLI);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1471,13 +1539,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI,
-                                          DT, MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
+                                          MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
-                                TD, TLI, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1495,7 +1563,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
 Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 static Type *GetCompareTy(Value *Op) {
@@ -1522,42 +1590,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
   return 0;
 }
 
-/// stripPointerAdjustments - This is like Value::stripPointerCasts, but also
-/// removes inbounds gep operations, regardless of their indices.
-static Value *stripPointerAdjustmentsImpl(Value *V,
-                                    SmallPtrSet<GEPOperator*, 8> &VisitedGEPs) {
-  GEPOperator *GEP = dyn_cast<GEPOperator>(V);
-  if (GEP == 0 || !GEP->isInBounds())
-    return V;
-
-  // If we've already seen this GEP, we will end up infinitely looping.  This
-  // can happen in unreachable code.
-  if (!VisitedGEPs.insert(GEP))
-    return V;
-  
-  return stripPointerAdjustmentsImpl(GEP->getOperand(0)->stripPointerCasts(),
-                                     VisitedGEPs);
-}
-
-static Value *stripPointerAdjustments(Value *V) {
-  SmallPtrSet<GEPOperator*, 8> VisitedGEPs;
-  return stripPointerAdjustmentsImpl(V, VisitedGEPs);
-}
-
 
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -1625,29 +1668,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // Be more aggressive about stripping pointer adjustments when checking a
   // comparison of an alloca address to another object.  We can rip off all
   // inbounds GEP operations, even if they are variable.
-  LHSPtr = stripPointerAdjustments(LHSPtr);
+  LHSPtr = LHSPtr->stripInBoundsOffsets();
   if (llvm::isIdentifiedObject(LHSPtr)) {
-    RHSPtr = stripPointerAdjustments(RHSPtr);
+    RHSPtr = RHSPtr->stripInBoundsOffsets();
     if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) {
       // If both sides are different identified objects, they aren't equal
       // unless they're null.
-      if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr))
-        return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+      if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) &&
+          Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
 
       // A local identified object (alloca or noalias call) can't equal any
       // incoming argument, unless they're both null.
-      if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr))
-        return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+      if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
+          Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
     }
 
     // Assume that the constant null is on the right.
-    if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr))
-      return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+    if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) {
+      if (Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+      else if (Pred == CmpInst::ICMP_NE)
+        return ConstantInt::get(ITy, true);
+    }
   } else if (isa<Argument>(LHSPtr)) {
-    RHSPtr = stripPointerAdjustments(RHSPtr);
+    RHSPtr = RHSPtr->stripInBoundsOffsets();
     // An alloca can't be equal to an argument.
-    if (isa<AllocaInst>(RHSPtr))
-      return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+    if (isa<AllocaInst>(RHSPtr)) {
+      if (Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+      else if (Pred == CmpInst::ICMP_NE)
+        return ConstantInt::get(ITy, true);
+    }
   }
 
   // If we are comparing with zero then try hard since this is a common case.
@@ -1661,40 +1714,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
-      if (isKnownNonZero(LHS, TD))
+      if (isKnownNonZero(LHS, Q.TD))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
-      if (isKnownNonZero(LHS, TD))
+      if (isKnownNonZero(LHS, Q.TD))
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getTrue(ITy);
       if (LHSKnownNonNegative)
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getTrue(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getFalse(ITy);
       if (LHSKnownNonNegative)
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SGT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getFalse(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
         return getTrue(ITy);
       break;
     }
@@ -1777,19 +1830,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
-        TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+    if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
+        Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
                                         ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        TD, TLI, DT, MaxRecurse-1))
+                                        Q, MaxRecurse-1))
           return V;
       } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
         if (RI->getOperand(0)->getType() == SrcTy)
           // Compare without the cast.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, TLI, DT, MaxRecurse-1))
+                                          Q, MaxRecurse-1))
             return V;
       }
     }
@@ -1801,7 +1854,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that signed predicates become unsigned.
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), TD, TLI, DT,
+                                          SrcOp, RI->getOperand(0), Q,
                                           MaxRecurse-1))
             return V;
       }
@@ -1817,7 +1870,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // also a case of comparing two zero-extended values.
         if (RExt == CI && MaxRecurse)
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                        SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1))
+                                        SrcOp, Trunc, Q, MaxRecurse-1))
             return V;
 
         // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
@@ -1861,7 +1914,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that the predicate does not change.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, TLI, DT, MaxRecurse-1))
+                                          Q, MaxRecurse-1))
             return V;
       }
       // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1875,8 +1928,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // If the re-extended constant didn't change then this is effectively
         // also a case of comparing two sign-extended values.
         if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT,
-                                          MaxRecurse-1))
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
             return V;
 
         // Otherwise the upper bits of LHS are all equal, while RHS has varying
@@ -1910,7 +1962,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, TLI, DT, MaxRecurse-1))
+                                              Q, MaxRecurse-1))
                 return V;
             break;
           case ICmpInst::ICMP_ULT:
@@ -1919,7 +1971,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, TLI, DT, MaxRecurse-1))
+                                              Q, MaxRecurse-1))
                 return V;
             break;
           }
@@ -1953,14 +2005,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
                                       Constant::getNullValue(RHS->getType()),
-                                      TD, TLI, DT, MaxRecurse-1))
+                                      Q, MaxRecurse-1))
         return V;
 
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred,
                                       Constant::getNullValue(LHS->getType()),
-                                      C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1))
+                                      C == LHS ? D : C, Q, MaxRecurse-1))
         return V;
 
     // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1969,7 +2021,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // Determine Y and Z in the form icmp (X+Y), (X+Z).
       Value *Y = (A == C || A == D) ? B : A;
       Value *Z = (C == A || C == B) ? D : C;
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1))
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
         return V;
     }
   }
@@ -1981,7 +2033,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -1991,7 +2043,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2008,7 +2060,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2018,7 +2070,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2052,7 +2104,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
         return V;
       break;
     case Instruction::Shl: {
@@ -2063,7 +2115,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
         return V;
       break;
     }
@@ -2117,7 +2169,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2131,7 +2183,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     }
@@ -2187,7 +2239,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2201,7 +2253,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     }
@@ -2283,13 +2335,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -2299,22 +2351,20 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -2382,13 +2432,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -2398,13 +2448,15 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const TargetData *TD,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
 /// the result.  If not, this returns null.
-Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
-                                const TargetData *TD, const DominatorTree *) {
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+                                 Value *FalseVal, const Query &Q,
+                                 unsigned MaxRecurse) {
   // select true, X, Y  -> X
   // select false, X, Y -> Y
   if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
@@ -2427,10 +2479,17 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
   return 0;
 }
 
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD,
+                                const TargetLibraryInfo *TLI,
+                                const DominatorTree *DT) {
+  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+                              RecursionLimit);
+}
+
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
-                             const DominatorTree *) {
+static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
   PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
   // The GEP pointer operand is not a pointer, it's a vector of pointers.
@@ -2454,9 +2513,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
       if (C->isZero())
         return Ops[0];
     // getelementptr P, N -> P if P points to a type of zero size.
-    if (TD) {
+    if (Q.TD) {
       Type *Ty = PtrTy->getElementType();
-      if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+      if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
         return Ops[0];
     }
   }
@@ -2469,12 +2528,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
   return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
 }
 
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+}
+
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
 /// can fold the result.  If not, this returns null.
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
-                                     ArrayRef<unsigned> Idxs,
-                                     const TargetData *,
-                                     const DominatorTree *) {
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                      ArrayRef<unsigned> Idxs, const Query &Q,
+                                      unsigned) {
   if (Constant *CAgg = dyn_cast<Constant>(Agg))
     if (Constant *CVal = dyn_cast<Constant>(Val))
       return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -2499,8 +2563,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
   return 0;
 }
 
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                     ArrayRef<unsigned> Idxs,
+                                     const TargetData *TD,
+                                     const TargetLibraryInfo *TLI,
+                                     const DominatorTree *DT) {
+  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+                                   RecursionLimit);
+}
+
 /// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
   // with the common value.
   Value *CommonValue = 0;
@@ -2528,81 +2601,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
   // instruction, we cannot return X as the result of the PHI node unless it
   // dominates the PHI block.
   if (HasUndefInput)
-    return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+    return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
 
   return CommonValue;
 }
 
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+  if (Constant *C = dyn_cast<Constant>(Op))
+    return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+
+  return 0;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT) {
+  return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+}
+
 //=== Helper functions for higher up the class hierarchy.
 
 /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                            const TargetData *TD, 
-                            const TargetLibraryInfo *TLI,
-                            const DominatorTree *DT,
-                            unsigned MaxRecurse) {
+                            const Query &Q, unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::Add:
     return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, TLI, DT, MaxRecurse);
+                           Q, MaxRecurse);
   case Instruction::Sub:
     return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, TLI, DT, MaxRecurse);
-  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
-  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT,
-                                                  MaxRecurse);
+                           Q, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, TLI, DT, MaxRecurse);
+                           Q, MaxRecurse);
   case Instruction::LShr:
-    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
-                            MaxRecurse);
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
   case Instruction::AShr:
-    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
-                            MaxRecurse);
-  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT,
-                                                MaxRecurse);
-  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, TLI, DT,
-                                                MaxRecurse);
-  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT,
-                                                MaxRecurse);
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
   default:
     if (Constant *CLHS = dyn_cast<Constant>(LHS))
       if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
         Constant *COps[] = {CLHS, CRHS};
-        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI);
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+                                        Q.TLI);
       }
 
     // If the operation is associative, try some generic simplifications.
     if (Instruction::isAssociative(Opcode))
-      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT,
-                                              MaxRecurse))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
-    // If the operation is with the result of a select instruction, check whether
+    // If the operation is with the result of a select instruction check whether
     // operating on either branch of the select always yields the same value.
     if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT,
-                                           MaxRecurse))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
     // If the operation is with the result of a phi instruction, check whether
     // operating on all incoming values of the phi always yields the same value.
     if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT,
-                                        MaxRecurse))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
     return 0;
@@ -2612,28 +2681,26 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                            const TargetData *TD, const TargetLibraryInfo *TLI,
                            const DominatorTree *DT) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
 /// fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD,
-                              const TargetLibraryInfo *TLI,
-                              const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
-    return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
+    return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                              const TargetData *TD, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                           RecursionLimit);
 }
 
-static Value *SimplifyCallInst(CallInst *CI) {
+static Value *SimplifyCallInst(CallInst *CI, const Query &) {
   // call undef -> undef
   if (isa<UndefValue>(CI->getCalledValue()))
     return UndefValue::get(CI->getType());
@@ -2720,25 +2787,28 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
-                                I->getOperand(2), TD, DT);
+                                I->getOperand(2), TD, TLI, DT);
     break;
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
-    Result = SimplifyGEPInst(Ops, TD, DT);
+    Result = SimplifyGEPInst(Ops, TD, TLI, DT);
     break;
   }
   case Instruction::InsertValue: {
     InsertValueInst *IV = cast<InsertValueInst>(I);
     Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
                                      IV->getInsertedValueOperand(),
-                                     IV->getIndices(), TD, DT);
+                                     IV->getIndices(), TD, TLI, DT);
     break;
   }
   case Instruction::PHI:
-    Result = SimplifyPHINode(cast<PHINode>(I), DT);
+    Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
     break;
   case Instruction::Call:
-    Result = SimplifyCallInst(cast<CallInst>(I));
+    Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT));
+    break;
+  case Instruction::Trunc:
+    Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
     break;
   }
 
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 279d6a9..5ca2746 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -866,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
       // BBFrom to BBTo.
       unsigned NumEdges = 0;
       ConstantInt *EdgeVal = 0;
-      for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
-        if (SI->getCaseSuccessor(i) != BBTo) continue;
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+           i != e; ++i) {
+        if (i.getCaseSuccessor() != BBTo) continue;
         if (NumEdges++) break;
-        EdgeVal = SI->getCaseValue(i);
+        EdgeVal = i.getCaseValue();
       }
       assert(EdgeVal && "Missing successor?");
       if (NumEdges == 1) {
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0e6bcbf..873a275 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 using namespace llvm;
 
@@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
 /// MaxInstsToScan specifies the maximum instructions to scan in the block.  If
 /// it is set to 0, it will scan the whole block. You can also optionally
 /// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there.  If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
 Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AliasAnalysis *AA) {
+                                      AliasAnalysis *AA,
+                                      MDNode **TBAATag) {
   if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
 
   // If we're using alias analysis to disambiguate get the size of *Ptr.
@@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
     // (This is true even if the load is volatile or atomic, although
     // those cases are unlikely.)
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+        if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
         return LI;
+      }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // If this is a store through Ptr, the value is available!
       // (This is true even if the store is volatile or atomic, although
       // those cases are unlikely.)
-      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+        if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
         return SI->getOperand(0);
+      }
       
       // If Ptr is an alloca and this is a store to a different alloca, ignore
       // the store.  This is a trivial form of alias analysis that is important
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index ca06300..38cb1c9 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       return GEP;
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) {
+    if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
 
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 0c7d05f..c819666 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -194,9 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
-  
-  unsigned CCase = SI.findCaseValue(cast<ConstantInt>(C));
-  Succs[SI.resolveSuccessorIndex(CCase)] = true;
+  SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+  Succs[Case.getSuccessorIndex()] = true;
 }
 
 
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b5811f2..01e00ca 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -41,6 +41,160 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
   return TD ? TD->getPointerSizeInBits() : 0;
 }
 
+static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+                                    const APInt &Mask,
+                                    APInt &KnownZero, APInt &KnownOne,
+                                    APInt &KnownZero2, APInt &KnownOne2,
+                                    const TargetData *TD, unsigned Depth) {
+  if (!Add) {
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (!CLHS->getValue().isNegative()) {
+        unsigned BitWidth = Mask.getBitWidth();
+        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        llvm::ComputeMaskedBits(Op1, MaskV, KnownZero2, KnownOne2, TD, Depth+1);
+    
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }
+    }
+  }
+
+  unsigned BitWidth = Mask.getBitWidth();
+
+  // If one of the operands has trailing zeros, then the bits that the
+  // other operand has in those bit positions will be preserved in the
+  // result. For an add, this works with either operand. For a subtract,
+  // this only works if the known zeros are in the right operand.
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                     BitWidth - Mask.countLeadingZeros());
+  llvm::ComputeMaskedBits(Op0, Mask2, LHSKnownZero, LHSKnownOne, TD, Depth+1);
+  assert((LHSKnownZero & LHSKnownOne) == 0 &&
+         "Bits known to be one AND zero?");
+  unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+  llvm::ComputeMaskedBits(Op1, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+  unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+  // Determine which operand has more trailing zeros, and use that
+  // many bits from the other operand.
+  if (LHSKnownZeroOut > RHSKnownZeroOut) {
+    if (Add) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+      KnownZero |= KnownZero2 & Mask;
+      KnownOne  |= KnownOne2 & Mask;
+    } else {
+      // If the known zeros are in the left operand for a subtract,
+      // fall back to the minimum known zeros in both operands.
+      KnownZero |= APInt::getLowBitsSet(BitWidth,
+                                        std::min(LHSKnownZeroOut,
+                                                 RHSKnownZeroOut));
+    }
+  } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+    APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+    KnownZero |= LHSKnownZero & Mask;
+    KnownOne  |= LHSKnownOne & Mask;
+  }
+
+  // Are we still trying to solve for the sign bit?
+  if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()) {
+    if (NSW) {
+      if (Add) {
+        // Adding two positive numbers can't wrap into negative
+        if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+          KnownZero |= APInt::getSignBit(BitWidth);
+        // and adding two negative numbers can't wrap into positive.
+        else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+          KnownOne |= APInt::getSignBit(BitWidth);
+      } else {
+        // Subtracting a negative number from a positive one can't wrap
+        if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+          KnownZero |= APInt::getSignBit(BitWidth);
+        // neither can subtracting a positive number from a negative one.
+        else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+          KnownOne |= APInt::getSignBit(BitWidth);
+      }
+    }
+  }
+}
+
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+                                 const APInt &Mask,
+                                 APInt &KnownZero, APInt &KnownOne,
+                                 APInt &KnownZero2, APInt &KnownOne2,
+                                 const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = Mask.getBitWidth();
+  APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+  ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1);
+  ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+  bool isKnownNegative = false;
+  bool isKnownNonNegative = false;
+  // If the multiplication is known not to overflow, compute the sign bit.
+  if (Mask.isNegative() && NSW) {
+    if (Op0 == Op1) {
+      // The product of a number with itself is non-negative.
+      isKnownNonNegative = true;
+    } else {
+      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+      bool isKnownNegativeOp1 = KnownOne.isNegative();
+      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      // The product of two numbers with the same sign is non-negative.
+      isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+        (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+      // The product of a negative number and a non-negative number is either
+      // negative or zero.
+      if (!isKnownNonNegative)
+        isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+                           isKnownNonZero(Op0, TD, Depth)) ||
+                          (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+                           isKnownNonZero(Op1, TD, Depth));
+    }
+  }
+
+  // If low bits are zero in either operand, output low known-0 bits.
+  // Also compute a conserative estimate for high known-0 bits.
+  // More trickiness is possible, but this is sufficient for the
+  // interesting case of alignment computation.
+  KnownOne.clearAllBits();
+  unsigned TrailZ = KnownZero.countTrailingOnes() +
+                    KnownZero2.countTrailingOnes();
+  unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                             KnownZero2.countLeadingOnes(),
+                             BitWidth) - BitWidth;
+
+  TrailZ = std::min(TrailZ, BitWidth);
+  LeadZ = std::min(LeadZ, BitWidth);
+  KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+              APInt::getHighBitsSet(BitWidth, LeadZ);
+  KnownZero &= Mask;
+
+  // Only make use of no-wrap flags if we failed to compute the sign bit
+  // directly.  This matters if the multiplication always overflows, in
+  // which case we prefer to follow the result of the direct computation,
+  // though as the program is invoking undefined behaviour we can choose
+  // whatever we like here.
+  if (isKnownNonNegative && !KnownOne.isNegative())
+    KnownZero.setBit(BitWidth - 1);
+  else if (isKnownNegative && !KnownZero.isNegative())
+    KnownOne.setBit(BitWidth - 1);
+}
+
 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
@@ -106,16 +260,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+    if (Align == 0 && TD) {
       if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
         Type *ObjectType = GVar->getType()->getElementType();
-        // If the object is defined in the current Module, we'll be giving
-        // it the preferred alignment. Otherwise, we have to assume that it
-        // may only have the minimum ABI alignment.
-        if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
-          Align = TD->getPreferredAlignment(GVar);
-        else
-          Align = TD->getABITypeAlignment(ObjectType);
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+            Align = TD->getPreferredAlignment(GVar);
+          else
+            Align = TD->getABITypeAlignment(ObjectType);
+        }
       }
     }
     if (Align > 0)
@@ -203,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
   case Instruction::Mul: {
-    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
-                      Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
-    bool isKnownNegative = false;
-    bool isKnownNonNegative = false;
-    // If the multiplication is known not to overflow, compute the sign bit.
-    if (Mask.isNegative() &&
-        cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
-      Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0);
-      if (Op1 == Op2) {
-        // The product of a number with itself is non-negative.
-        isKnownNonNegative = true;
-      } else {
-        bool isKnownNonNegative1 = KnownZero.isNegative();
-        bool isKnownNonNegative2 = KnownZero2.isNegative();
-        bool isKnownNegative1 = KnownOne.isNegative();
-        bool isKnownNegative2 = KnownOne2.isNegative();
-        // The product of two numbers with the same sign is non-negative.
-        isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) ||
-          (isKnownNonNegative1 && isKnownNonNegative2);
-        // The product of a negative number and a non-negative number is either
-        // negative or zero.
-        if (!isKnownNonNegative)
-          isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 &&
-                             isKnownNonZero(Op2, TD, Depth)) ||
-                            (isKnownNegative2 && isKnownNonNegative1 &&
-                             isKnownNonZero(Op1, TD, Depth));
-      }
-    }
-
-    // If low bits are zero in either operand, output low known-0 bits.
-    // Also compute a conserative estimate for high known-0 bits.
-    // More trickiness is possible, but this is sufficient for the
-    // interesting case of alignment computation.
-    KnownOne.clearAllBits();
-    unsigned TrailZ = KnownZero.countTrailingOnes() +
-                      KnownZero2.countTrailingOnes();
-    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
-                               KnownZero2.countLeadingOnes(),
-                               BitWidth) - BitWidth;
-
-    TrailZ = std::min(TrailZ, BitWidth);
-    LeadZ = std::min(LeadZ, BitWidth);
-    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
-                APInt::getHighBitsSet(BitWidth, LeadZ);
-    KnownZero &= Mask;
-
-    // Only make use of no-wrap flags if we failed to compute the sign bit
-    // directly.  This matters if the multiplication always overflows, in
-    // which case we prefer to follow the result of the direct computation,
-    // though as the program is invoking undefined behaviour we can choose
-    // whatever we like here.
-    if (isKnownNonNegative && !KnownOne.isNegative())
-      KnownZero.setBit(BitWidth - 1);
-    else if (isKnownNegative && !KnownZero.isNegative())
-      KnownOne.setBit(BitWidth - 1);
-
-    return;
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+                         Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+                         TD, Depth);
+    break;
   }
   case Instruction::UDiv: {
     // For the purposes of computing leading zeros we can conservatively
@@ -422,91 +521,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     }
     break;
   case Instruction::Sub: {
-    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
-      // We know that the top bits of C-X are clear if X contains less bits
-      // than C (i.e. no wrap-around can happen).  For example, 20-X is
-      // positive if we can prove that X is >= 0 and < 16.
-      if (!CLHS->getValue().isNegative()) {
-        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
-        // NLZ can't be BitWidth with no sign bit
-        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
-                          TD, Depth+1);
-    
-        // If all of the MaskV bits are known to be zero, then we know the
-        // output top bits are zero, because we now know that the output is
-        // from [0-C].
-        if ((KnownZero2 & MaskV) == MaskV) {
-          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
-          // Top bits known zero.
-          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
-        }
-      }        
-    }
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+                            Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+                            TD, Depth);
+    break;
   }
-  // fall through
   case Instruction::Add: {
-    // If one of the operands has trailing zeros, then the bits that the
-    // other operand has in those bit positions will be preserved in the
-    // result. For an add, this works with either operand. For a subtract,
-    // this only works if the known zeros are in the right operand.
-    APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
-                                       BitWidth - Mask.countLeadingZeros());
-    ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
-                      Depth+1);
-    assert((LHSKnownZero & LHSKnownOne) == 0 &&
-           "Bits known to be one AND zero?");
-    unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
-    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, 
-                      Depth+1);
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
-    // Determine which operand has more trailing zeros, and use that
-    // many bits from the other operand.
-    if (LHSKnownZeroOut > RHSKnownZeroOut) {
-      if (I->getOpcode() == Instruction::Add) {
-        APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
-        KnownZero |= KnownZero2 & Mask;
-        KnownOne  |= KnownOne2 & Mask;
-      } else {
-        // If the known zeros are in the left operand for a subtract,
-        // fall back to the minimum known zeros in both operands.
-        KnownZero |= APInt::getLowBitsSet(BitWidth,
-                                          std::min(LHSKnownZeroOut,
-                                                   RHSKnownZeroOut));
-      }
-    } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
-      APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
-      KnownZero |= LHSKnownZero & Mask;
-      KnownOne  |= LHSKnownOne & Mask;
-    }
-
-    // Are we still trying to solve for the sign bit?
-    if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
-      OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
-      if (OBO->hasNoSignedWrap()) {
-        if (I->getOpcode() == Instruction::Add) {
-          // Adding two positive numbers can't wrap into negative
-          if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
-            KnownZero |= APInt::getSignBit(BitWidth);
-          // and adding two negative numbers can't wrap into positive.
-          else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
-            KnownOne |= APInt::getSignBit(BitWidth);
-        } else {
-          // Subtracting a negative number from a positive one can't wrap
-          if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
-            KnownZero |= APInt::getSignBit(BitWidth);
-          // neither can subtracting a positive number from a negative one.
-          else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
-            KnownOne |= APInt::getSignBit(BitWidth);
-        }
-      }
-    }
-
-    return;
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+                            Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+                            TD, Depth);
+    break;
   }
   case Instruction::SRem:
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -691,8 +717,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       if (P->hasConstantValue() == P)
         break;
 
-      KnownZero = APInt::getAllOnesValue(BitWidth);
-      KnownOne = APInt::getAllOnesValue(BitWidth);
+      KnownZero = Mask;
+      KnownOne = Mask;
       for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
         // Skip direct self references.
         if (P->getIncomingValue(i) == P) continue;
@@ -723,21 +749,51 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         // If this call is undefined for 0, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
           LowBits -= 1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::ctpop: {
         unsigned LowBits = Log2_32(BitWidth)+1;
-        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
       case Intrinsic::x86_sse42_crc32_64_8:
       case Intrinsic::x86_sse42_crc32_64_64:
-        KnownZero = APInt::getHighBitsSet(64, 32);
+        KnownZero = Mask & APInt::getHighBitsSet(64, 32);
         break;
       }
     }
     break;
+  case Instruction::ExtractValue:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+      ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+      if (EVI->getNumIndices() != 1) break;
+      if (EVI->getIndices()[0] == 0) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
+                                  II->getArgOperand(1), false, Mask,
+                                  KnownZero, KnownOne, KnownZero2, KnownOne2,
+                                  TD, Depth);
+          break;
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+          ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
+                                  II->getArgOperand(1), false, Mask,
+                                  KnownZero, KnownOne, KnownZero2, KnownOne2,
+                                  TD, Depth);
+          break;
+        case Intrinsic::umul_with_overflow:
+        case Intrinsic::smul_with_overflow:
+          ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+                               false, Mask, KnownZero, KnownOne,
+                               KnownZero2, KnownOne2, TD, Depth);
+          break;
+        }
+      }
+    }
   }
 }
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9376990..b25d2e9 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1142,9 +1142,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
       Vals.push_back(VE.getValueID(SI.getCondition()));
       Vals.push_back(VE.getValueID(SI.getDefaultDest()));
-      for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) {
-        Vals.push_back(VE.getValueID(SI.getCaseValue(i)));
-        Vals.push_back(VE.getValueID(SI.getCaseSuccessor(i)));
+      for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+           i != e; ++i) {
+        Vals.push_back(VE.getValueID(i.getCaseValue()));
+        Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
       }
     }
     break;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d5926f9..dd3fb3b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1864,13 +1864,12 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
 
 static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
                                    AsmPrinter &AP) {
-  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+  const TargetData *TD = AP.TM.getTargetData();
+  uint64_t Size = TD->getTypeAllocSize(CV->getType());
+  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
     return AP.OutStreamer.EmitZeros(Size, AddrSpace);
-  }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
@@ -1891,7 +1890,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
     return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
@@ -1905,20 +1903,28 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
     return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
 
-  // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
-  // vectors).
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV))
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+    // vectors).
     if (CE->getOpcode() == Instruction::BitCast)
       return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+    if (Size > 8) {
+      // If the constant expression's size is greater than 64-bits, then we have
+      // to emit the value in chunks. Try to constant fold the value and emit it
+      // that way.
+      Constant *New = ConstantFoldConstantExpression(CE, TD);
+      if (New && New != CE)
+        return EmitGlobalConstantImpl(New, AddrSpace, AP);
+    }
+  }
   
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return EmitGlobalConstantVector(V, AddrSpace, AP);
     
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
-                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                           AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -2102,27 +2108,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       OutStreamer.EmitLabel(Syms[i]);
   }
 
+  // Print some verbose block comments.
+  if (isVerbose()) {
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      if (BB->hasName())
+        OutStreamer.AddComment("%" + BB->getName());
+    EmitBasicBlockLoopComments(*MBB, LI, *this);
+  }
+
   // Print the main label for the block.
   if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
                               Twine(MBB->getNumber()) + ":");
     }
   } else {
-    if (isVerbose()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-    }
-
     OutStreamer.EmitLabel(MBB->getSymbol());
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 51c635e..3b383f6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -156,13 +156,12 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   // Verify subprogram.
   if (!SP.Verify())
     return;
-  // If the line number is 0, don't add it.
-  if (SP.getLineNumber() == 0)
-    return;
 
+  // If the line number is 0, don't add it.
   unsigned Line = SP.getLineNumber();
-  if (!SP.getContext().Verify())
+  if (Line == 0)
     return;
+
   unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
                                             SP.getDirectory());
   assert(FileID && "Invalid file id");
@@ -178,7 +177,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
     return;
 
   unsigned Line = Ty.getLineNumber();
-  if (Line == 0 || !Ty.getContext().Verify())
+  if (Line == 0)
     return;
   unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
                                             Ty.getDirectory());
@@ -870,11 +869,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     if (CTy.isAppleBlockExtension())
       addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
-    unsigned RLang = CTy.getRunTimeLang();
-    if (RLang)
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
-              dwarf::DW_FORM_data1, RLang);
-
     DICompositeType ContainingType = CTy.getContainingType();
     if (DIDescriptor(ContainingType).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -922,6 +916,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // Add source line info if available.
     if (!CTy.isForwardDecl())
       addSourceLine(&Buffer, CTy);
+
+    // No harm in adding the runtime language to the declaration.
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
   }
 }
 
@@ -1006,6 +1006,9 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   // Add function template parameters.
   addTemplateParams(*SPDie, SP.getTemplateParams());
 
+  // Unfortunately this code needs to stay here to work around
+  // a bug in older gdbs that requires the linkage name to resolve
+  // multiple template functions.
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
     addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index fa62169..388cef4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -523,20 +523,19 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
     DirName = "";
 
   unsigned SrcId = SourceIdMap.size()+1;
-  std::pair<std::string, std::string> SourceName =
-      std::make_pair(FileName, DirName);
-  std::pair<std::pair<std::string, std::string>, unsigned> Entry =
-      make_pair(SourceName, SrcId);
 
-  std::map<std::pair<std::string, std::string>, unsigned>::iterator I;
-  bool NewlyInserted;
-  llvm::tie(I, NewlyInserted) = SourceIdMap.insert(Entry);
-  if (!NewlyInserted)
-    return I->second;
+  // We look up the file/dir pair by concatenating them with a zero byte.
+  SmallString<128> NamePair;
+  NamePair += DirName;
+  NamePair += '\0'; // Zero bytes are not allowed in paths.
+  NamePair += FileName;
+
+  StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+  if (Ent.getValue() != SrcId)
+    return Ent.getValue();
 
   // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.first.second,
-                                          Entry.first.first);
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
 
   return SrcId;
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8b802d2..83f30f5 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -26,7 +26,6 @@
 #include "llvm/ADT/UniqueVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLoc.h"
-#include <map>
 
 namespace llvm {
 
@@ -209,9 +208,9 @@ class DwarfDebug {
   ///
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of source filename and directory
-  /// mapped to a unique id.
-  std::map<std::pair<std::string, std::string>, unsigned> SourceIdMap;
+  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+  /// separated by a zero byte, mapped to a unique id.
+  StringMap<unsigned> SourceIdMap;
 
   /// StringPool - A String->Symbol mapping of strings used by indirect
   /// references.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 59c92b3..f57f4a8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1019,12 +1019,27 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   return MBB2I->isCall() && !MBB1I->isCall();
 }
 
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return DebugLoc();
+  --I;
+  while (I->isDebugValue() && I != MBB.begin())
+    --I;
+  if (I->isBranch())
+    return I->getDebugLoc();
+  return DebugLoc();
+}
+
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
 /// block.  This is never called on the entry block.
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   bool MadeChange = false;
   MachineFunction &MF = *MBB->getParent();
-  DebugLoc dl;  // FIXME: this is nowhere
 ReoptimizeBlock:
 
   MachineFunction::iterator FallThrough = MBB;
@@ -1073,6 +1088,7 @@ ReoptimizeBlock:
     // destination, remove the branch, replacing it with an unconditional one or
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       PriorCond.clear();
       if (PriorTBB != MBB)
@@ -1130,6 +1146,7 @@ ReoptimizeBlock:
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
       MadeChange = true;
@@ -1143,6 +1160,7 @@ ReoptimizeBlock:
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
       if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        DebugLoc dl = getBranchDebugLoc(PrevBB);
         TII->RemoveBranch(PrevBB);
         TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
         MadeChange = true;
@@ -1180,6 +1198,7 @@ ReoptimizeBlock:
           DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
 
+          DebugLoc dl = getBranchDebugLoc(PrevBB);
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
 
@@ -1209,6 +1228,7 @@ ReoptimizeBlock:
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
       if (!TII->ReverseBranchCondition(NewCond)) {
+        DebugLoc dl = getBranchDebugLoc(*MBB);
         TII->RemoveBranch(*MBB);
         TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
         MadeChange = true;
@@ -1222,6 +1242,7 @@ ReoptimizeBlock:
     if (CurTBB && CurCond.empty() && CurFBB == 0 &&
         IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
+      DebugLoc dl = getBranchDebugLoc(*MBB);
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
@@ -1264,8 +1285,9 @@ ReoptimizeBlock:
               assert(PriorFBB == 0 && "Machine CFG out of date!");
               PriorFBB = MBB;
             }
+            DebugLoc pdl = getBranchDebugLoc(PrevBB);
             TII->RemoveBranch(PrevBB);
-            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1289,9 +1311,10 @@ ReoptimizeBlock:
               bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
                       NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+                DebugLoc pdl = getBranchDebugLoc(*PMBB);
                 TII->RemoveBranch(*PMBB);
                 NewCurCond.clear();
-                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
                 MadeChange = true;
                 ++NumBranchOpts;
                 PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1351,7 +1374,7 @@ ReoptimizeBlock:
           if (CurFallsThru) {
             MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
             CurCond.clear();
-            TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0362365..21729cd 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -80,7 +80,6 @@ add_llvm_library(LLVMCodeGen
   RegisterScavenging.cpp
   RenderMachineFunction.cpp
   ScheduleDAG.cpp
-  ScheduleDAGEmit.cpp
   ScheduleDAGInstrs.cpp
   ScheduleDAGPrinter.cpp
   ScoreboardHazardRecognizer.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c684cdc..bad5010 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
-  DefIndices(TRI->getNumRegs(), 0) {}
+  DefIndices(TRI->getNumRegs(), 0),
+  KeepRegs(TRI->getNumRegs(), false) {}
 
 CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
 }
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   }
 
   // Clear "do not change" set.
-  KeepRegs.clear();
+  KeepRegs.reset();
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+  bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
 
   // Determine the live-out physregs for this block.
   if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          E = MRI.liveout_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
       for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
            E = (*SI)->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
       for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -106,14 +107,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-    KillIndices[Reg] = BB->size();
+    KillIndices[Reg] = BBSize;
     DefIndices[Reg] = ~0u;
 
     // Repeat, for all aliases.
     for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[AliasReg] = BB->size();
+      KillIndices[AliasReg] = BBSize;
       DefIndices[AliasReg] = ~0u;
     }
   }
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 
 void CriticalAntiDepBreaker::FinishBlock() {
   RegRefs.clear();
-  KeepRegs.clear();
+  KeepRegs.reset();
 }
 
 void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
       RegRefs.insert(std::make_pair(Reg, &MO));
 
     if (MO.isUse() && Special) {
-      if (KeepRegs.insert(Reg)) {
+      if (!KeepRegs.test(Reg)) {
+        KeepRegs.set(Reg);
         for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
              *Subreg; ++Subreg)
-          KeepRegs.insert(*Subreg);
+          KeepRegs.set(*Subreg);
       }
     }
   }
@@ -259,7 +261,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
           if (MO.clobbersPhysReg(i)) {
             DefIndices[i] = Count;
             KillIndices[i] = ~0u;
-            KeepRegs.erase(i);
+            KeepRegs.reset(i);
             Classes[i] = 0;
             RegRefs.erase(i);
           }
@@ -276,7 +278,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       assert(((KillIndices[Reg] == ~0u) !=
               (DefIndices[Reg] == ~0u)) &&
              "Kill and Def maps aren't consistent for Reg!");
-      KeepRegs.erase(Reg);
+      KeepRegs.reset(Reg);
       Classes[Reg] = 0;
       RegRefs.erase(Reg);
       // Repeat, for all subregs.
@@ -285,7 +287,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
         unsigned SubregReg = *Subreg;
         DefIndices[SubregReg] = Count;
         KillIndices[SubregReg] = ~0u;
-        KeepRegs.erase(SubregReg);
+        KeepRegs.reset(SubregReg);
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
@@ -551,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
           if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
-          else if (KeepRegs.count(AntiDepReg))
+          else if (KeepRegs.test(AntiDepReg))
             // Don't break anti-dependencies if an use down below requires
             // this exact register.
             AntiDepReg = 0;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0710780..7746259 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
 
     /// KeepRegs - A set of registers which are live and cannot be changed to
     /// break anti-dependencies.
-    SmallSet<unsigned, 4> KeepRegs;
+    BitVector KeepRegs;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index f0cf290..5ff641c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,10 +23,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 using namespace llvm;
@@ -103,15 +103,12 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
 namespace {
 // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
 // Schedule method to build the dependence graph.
-//
-// ScheduleDAGInstrs has LLVM_LIBRARY_VISIBILITY so we have to reference it as
-// an opaque pointer in VLIWPacketizerList.
 class DefaultVLIWScheduler : public ScheduleDAGInstrs {
 public:
   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
                        MachineDominatorTree &MDT, bool IsPostRA);
   // Schedule - Actual scheduling work.
-  void Schedule();
+  void schedule();
 };
 } // end anonymous namespace
 
@@ -121,9 +118,9 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(
   ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
 }
 
-void DefaultVLIWScheduler::Schedule() {
+void DefaultVLIWScheduler::schedule() {
   // Build the scheduling graph.
-  BuildSchedGraph(0);
+  buildSchedGraph(0);
 }
 
 // VLIWPacketizerList Ctor
@@ -137,7 +134,7 @@ VLIWPacketizerList::VLIWPacketizerList(
 
 // VLIWPacketizerList Dtor
 VLIWPacketizerList::~VLIWPacketizerList() {
-  delete (DefaultVLIWScheduler *)SchedulerImpl;
+  delete SchedulerImpl;
   delete ResourceTracker;
 }
 
@@ -184,18 +181,15 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
                                       MachineBasicBlock::iterator BeginItr,
                                       MachineBasicBlock::iterator EndItr) {
-  DefaultVLIWScheduler *Scheduler = (DefaultVLIWScheduler *)SchedulerImpl;
-  Scheduler->Run(MBB, BeginItr, EndItr, MBB->size());
+  assert(MBB->end() == EndItr && "Bad EndIndex");
 
-  // Remember scheduling units.
-  SUnits = Scheduler->SUnits;
+  SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size());
 
-  // Generate MI -> SU map.
-  std::map <MachineInstr*, SUnit*> MIToSUnit;
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    SUnit *SU = &SUnits[i];
-    MIToSUnit[SU->getInstr()] = SU;
-  }
+  // Build the DAG without reordering instructions.
+  SchedulerImpl->schedule();
+
+  // Remember scheduling units.
+  SUnits = SchedulerImpl->SUnits;
 
   // The main packetizer loop.
   for (; BeginItr != EndItr; ++BeginItr) {
@@ -211,7 +205,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
       continue;
     }
 
-    SUnit *SUI = MIToSUnit[MI];
+    SUnit *SUI = SchedulerImpl->getSUnit(MI);
     assert(SUI && "Missing SUnit Info!");
 
     // Ask DFA if machine resource is available for MI.
@@ -221,7 +215,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
       for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
            VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
         MachineInstr *MJ = *VI;
-        SUnit *SUJ = MIToSUnit[MJ];
+        SUnit *SUJ = SchedulerImpl->getSUnit(MJ);
         assert(SUJ && "Missing SUnit Info!");
 
         // Is it legal to packetize SUI and SUJ together.
@@ -245,4 +239,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
 
   // End any packet left behind.
   endPacket(MBB, EndItr);
+
+  SchedulerImpl->exitRegion();
 }
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 17633e2..97e6547 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -90,7 +90,7 @@ static void addPassesToHandleExceptions(TargetMachine *TM,
     // removed from the parent invoke(s). This could happen when a landing
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
-    PM.add(createSjLjEHPass(TM->getTargetLowering()));
+    PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0578229..deab05a 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
 }
 
 
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
 // successor nodes that have a single unscheduled predecessor.  If so, that
 // single predecessor has a higher priority, since scheduling it will make
 // the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index c35302a..2187833 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
                  LiveInterval *LI, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS, MachineDominatorTree &MDT,
-		 UserValueScopes &UVS);
+                 UserValueScopes &UVS);
 
   /// addDefsFromCopies - The value in LI/LocNo may be copies to other
   /// registers. Determine if any of the copies are available at the kill
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
                           LiveInterval *LI, const VNInfo *VNI,
                           SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS, MachineDominatorTree &MDT,
-			  UserValueScopes &UVS) {
+                          UserValueScopes &UVS) {
   SmallVector<SlotIndex, 16> Todo;
   Todo.push_back(Idx);
   do {
@@ -620,7 +620,7 @@ void
 UserValue::computeIntervals(MachineRegisterInfo &MRI,
                             LiveIntervals &LIS,
                             MachineDominatorTree &MDT,
-			    UserValueScopes &UVS) {
+                            UserValueScopes &UVS) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
 UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
   bool DidChange = false;
   // Split locations referring to OldReg. Iterate backwards so splitLocation can
-  // safely erase unuused locations.
+  // safely erase unused locations.
   for (unsigned i = locations.size(); i ; --i) {
     unsigned LocNo = i-1;
     const MachineOperand *Loc = &locations[LocNo];
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 70ed1c3..3ade660 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1049,9 +1049,19 @@ public:
     bool hasRegMaskOp = false;
     collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
 
-    moveAllEnteringFrom(OldIdx, Entering);
-    moveAllInternalFrom(OldIdx, Internal);
-    moveAllExitingFrom(OldIdx, Exiting);
+    // To keep the LiveRanges valid within an interval, move the ranges closest
+    // to the destination first. This prevents ranges from overlapping, to that
+    // APIs like removeRange still work.
+    if (NewIdx < OldIdx) {
+      moveAllEnteringFrom(OldIdx, Entering);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllExitingFrom(OldIdx, Exiting);
+    }
+    else {
+      moveAllExitingFrom(OldIdx, Exiting);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllEnteringFrom(OldIdx, Entering);
+    }
 
     if (hasRegMaskOp)
       updateRegMaskSlots(OldIdx);
@@ -1319,8 +1329,14 @@ private:
   void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
     LiveInterval* LI = P.first;
     LiveRange* LR = P.second;
+    // Extend the LiveRange if NewIdx is past the end.
     if (NewIdx > LR->end) {
-      moveKillFlags(LI->reg, LR->end, NewIdx);
+      // Move kill flags if OldIdx was not originally the end
+      // (otherwise LR->end points to an invalid slot).
+      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+        moveKillFlags(LI->reg, LR->end, NewIdx);
+      }
       LR->end = NewIdx.getRegSlot();
     }
   }
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 9c3d255..48e1e4c 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -109,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
   // Mark the variable known alive in this bb
   VRInfo.AliveBlocks.set(BBNum);
 
+  assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
   WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
 }
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 611b045..ca8a8e8 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -238,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
     return "(null)";
 }
 
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+  std::string Name;
+  if (getParent())
+    Name = (getParent()->getFunction()->getName() + ":").str();
+  if (getBasicBlock())
+    Name += getBasicBlock()->getName();
+  else
+    Name += (Twine("BB") + Twine(getNumber())).str();
+  return Name;
+}
+
 void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e9f9475..43af1ad 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -481,7 +482,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// MCID NULL and no operands.
 MachineInstr::MachineInstr()
   : MCID(0), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0),
+    NumMemRefs(0), MemRefs(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -489,10 +490,10 @@ MachineInstr::MachineInstr()
 
 void MachineInstr::addImplicitDefUseOperands() {
   if (MCID->ImplicitDefs)
-    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+    for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
-    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+    for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
@@ -501,7 +502,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// the MCInstrDesc.
 MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +517,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
 MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +533,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -548,7 +549,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -563,7 +564,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
-    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
 
@@ -738,28 +739,23 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
   mmo_iterator OldMemRefs = MemRefs;
-  mmo_iterator OldMemRefsEnd = MemRefsEnd;
+  uint16_t OldNumMemRefs = NumMemRefs;
 
-  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  uint16_t NewNum = NumMemRefs + 1;
   mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
-  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
 
-  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
   NewMemRefs[NewNum - 1] = MO;
 
   MemRefs = NewMemRefs;
-  MemRefsEnd = NewMemRefsEnd;
+  NumMemRefs = NewNum;
 }
 
-bool
-MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const {
-  if (Type == IgnoreBundle || !isBundle())
-    return getDesc().getFlags() & (1 << MCFlag);
-
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
   const MachineBasicBlock *MBB = getParent();
   MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
   while (MII != MBB->end() && MII->isInsideBundle()) {
-    if (MII->getDesc().getFlags() & (1 << MCFlag)) {
+    if (MII->getDesc().getFlags() & Mask) {
       if (Type == AnyInBundle)
         return true;
     } else {
@@ -1843,49 +1839,55 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
 
 unsigned
 MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
-  unsigned Hash = MI->getOpcode() * 37;
+  // Build up a buffer of hash code components.
+  //
+  // FIXME: This is a total hack. We should have a hash_value overload for
+  // MachineOperand, but currently that doesn't work because there are many
+  // different ideas of "equality" and thus different sets of information that
+  // contribute to the hash code. This one happens to want to take a specific
+  // subset. And it's still not clear that this routine uses the *correct*
+  // subset of information when computing the hash code. The goal is to use the
+  // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+  // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+  // be very useful to factor the selection of relevant inputs out of the two
+  // functions and into a common routine, but it's not clear how that can be
+  // done.
+  SmallVector<size_t, 8> HashComponents;
+  HashComponents.reserve(MI->getNumOperands() + 1);
+  HashComponents.push_back(MI->getOpcode());
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    uint64_t Key = (uint64_t)MO.getType() << 32;
     switch (MO.getType()) {
     default: break;
     case MachineOperand::MO_Register:
       if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;  // Skip virtual register defs.
-      Key |= MO.getReg();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
       break;
     case MachineOperand::MO_Immediate:
-      Key |= MO.getImm();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
       break;
     case MachineOperand::MO_FrameIndex:
     case MachineOperand::MO_ConstantPoolIndex:
     case MachineOperand::MO_JumpTableIndex:
-      Key |= MO.getIndex();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
       break;
     case MachineOperand::MO_GlobalAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
       break;
     case MachineOperand::MO_BlockAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      HashComponents.push_back(hash_combine(MO.getType(),
+                                            MO.getBlockAddress()));
       break;
     case MachineOperand::MO_MCSymbol:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
       break;
     }
-    Key += ~(Key << 32);
-    Key ^= (Key >> 22);
-    Key += ~(Key << 13);
-    Key ^= (Key >> 8);
-    Key += (Key << 3);
-    Key ^= (Key >> 15);
-    Key += ~(Key << 27);
-    Key ^= (Key >> 31);
-    Hash = (unsigned)Key + Hash * 37;
-  }
-  return Hash;
+  }
+  return hash_combine_range(HashComponents.begin(), HashComponents.end());
 }
 
 void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index d1f2df9..73489a7 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -229,6 +229,8 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
            "First instr cannot be inside bundle before finalization!");
 
     MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+    if (MII == MIE)
+      continue;
     for (++MII; MII != MIE; ) {
       if (!MII->isInsideBundle())
         ++MII;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 7d40e66..f140dec 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -161,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   // Since we are in SSA form, we can use the first definition.
-  if (!def_empty(Reg))
-    return &*def_begin(Reg);
-  return 0;
+  def_iterator I = def_begin(Reg);
+  return !I.atEnd() ? &*I : 0;
 }
 
 bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 8a485e0..364a244 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -14,10 +14,10 @@
 
 #define DEBUG_TYPE "misched"
 
-#include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -25,25 +25,36 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
 
 #include <queue>
 
 using namespace llvm;
 
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+                                  cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+                                  cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+  cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
 //===----------------------------------------------------------------------===//
 // Machine Instruction Scheduling Pass and Registry
 //===----------------------------------------------------------------------===//
 
 namespace {
 /// MachineScheduler runs after coalescing and before register allocation.
-class MachineScheduler : public MachineFunctionPass {
+class MachineScheduler : public MachineSchedContext,
+                         public MachineFunctionPass {
 public:
-  MachineFunction *MF;
-  const TargetInstrInfo *TII;
-  const MachineLoopInfo *MLI;
-  const MachineDominatorTree *MDT;
-  LiveIntervals *LIS;
-
   MachineScheduler();
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -71,7 +82,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched",
                     "Machine Instruction Scheduler", false, false)
 
 MachineScheduler::MachineScheduler()
-: MachineFunctionPass(ID), MF(0), MLI(0), MDT(0) {
+: MachineFunctionPass(ID) {
   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
 }
 
@@ -80,7 +91,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(MachineDominatorsID);
   AU.addRequired<MachineLoopInfo>();
   AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<TargetPassConfig>();
   AU.addRequired<SlotIndexes>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveIntervals>();
@@ -88,91 +99,226 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-namespace {
-/// MachineSchedRegistry provides a selection of available machine instruction
-/// schedulers.
-class MachineSchedRegistry : public MachinePassRegistryNode {
-public:
-  typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineScheduler *);
+MachinePassRegistry MachineSchedRegistry::Registry;
 
-  // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
-  typedef ScheduleDAGCtor FunctionPassCtor;
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+  return 0;
+}
 
-  static MachinePassRegistry Registry;
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+               RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+                cl::init(&useDefaultMachineSched), cl::Hidden,
+                cl::desc("Machine instruction scheduler to use"));
 
-  MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
-    : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
-    Registry.Add(this);
-  }
-  ~MachineSchedRegistry() { Registry.Remove(this); }
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+                     useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  // Initialize the context of the pass.
+  MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  PassConfig = &getAnalysis<TargetPassConfig>();
+  AA = &getAnalysis<AliasAnalysis>();
 
-  // Accessors.
-  //
-  MachineSchedRegistry *getNext() const {
-    return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
-  }
-  static MachineSchedRegistry *getList() {
-    return (MachineSchedRegistry *)Registry.getList();
-  }
-  static ScheduleDAGCtor getDefault() {
-    return (ScheduleDAGCtor)Registry.getDefault();
-  }
-  static void setDefault(ScheduleDAGCtor C) {
-    Registry.setDefault((MachinePassCtor)C);
+  LIS = &getAnalysis<LiveIntervals>();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Select the scheduler, or set the default.
+  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+  if (Ctor == useDefaultMachineSched) {
+    // Get the default scheduler set by the target.
+    Ctor = MachineSchedRegistry::getDefault();
+    if (!Ctor) {
+      Ctor = createConvergingSched;
+      MachineSchedRegistry::setDefault(Ctor);
+    }
   }
-  static void setListener(MachinePassRegistryListener *L) {
-    Registry.setListener(L);
+  // Instantiate the selected scheduler.
+  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+  // Visit all machine basic blocks.
+  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+       MBB != MBBEnd; ++MBB) {
+
+    Scheduler->startBlock(MBB);
+
+    // Break the block into scheduling regions [I, RegionEnd), and schedule each
+    // region as soon as it is discovered. RegionEnd points the the scheduling
+    // boundary at the bottom of the region. The DAG does not include RegionEnd,
+    // but the region does (i.e. the next RegionEnd is above the previous
+    // RegionBegin). If the current block has no terminator then RegionEnd ==
+    // MBB->end() for the bottom region.
+    //
+    // The Scheduler may insert instructions during either schedule() or
+    // exitRegion(), even for empty regions. So the local iterators 'I' and
+    // 'RegionEnd' are invalid across these calls.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+      // Avoid decrementing RegionEnd for blocks with no terminator.
+      if (RegionEnd != MBB->end()
+          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+        --RegionEnd;
+        // Count the boundary instruction.
+        --RemainingCount;
+      }
+
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+          break;
+      }
+      // Notify the scheduler of the region, even if we may skip scheduling
+      // it. Perhaps it still needs to be bundled.
+      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+      // Skip empty scheduling regions (0 or 1 schedulable instructions).
+      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+        // Close the current region. Bundle the terminator if needed.
+        // This invalidates 'RegionEnd' and 'I'.
+        Scheduler->exitRegion();
+        continue;
+      }
+      DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
+            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+            else dbgs() << "End";
+            dbgs() << " Remaining: " << RemainingCount << "\n");
+
+      // Schedule a region: possibly reorder instructions.
+      // This invalidates 'RegionEnd' and 'I'.
+      Scheduler->schedule();
+
+      // Close the current region.
+      Scheduler->exitRegion();
+
+      // Scheduling has invalidated the current iterator 'I'. Ask the
+      // scheduler for the top of it's scheduled region.
+      RegionEnd = Scheduler->begin();
+    }
+    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    Scheduler->finishBlock();
   }
-};
-} // namespace
+  DEBUG(LIS->print(dbgs()));
+  return true;
+}
 
-MachinePassRegistry MachineSchedRegistry::Registry;
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+  // unimplemented
+}
 
-static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P);
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
 
-/// MachineSchedOpt allows command line selection of the scheduler.
-static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
-               RegisterPassParser<MachineSchedRegistry> >
-MachineSchedOpt("misched",
-                cl::init(&createDefaultMachineSched), cl::Hidden,
-                cl::desc("Machine instruction scheduler to use"));
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+  virtual ~MachineSchedStrategy() {}
+
+  /// Initialize the strategy after building the DAG for a new region.
+  virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+  /// schedule the node at the top of the unscheduled region. Otherwise it will
+  /// be scheduled at the bottom.
+  virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+  /// When all predecessor dependencies have been resolved, free this node for
+  /// top-down scheduling.
+  virtual void releaseTopNode(SUnit *SU) = 0;
+  /// When all successor dependencies have been resolved, free this node for
+  /// bottom-up scheduling.
+  virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
 
 //===----------------------------------------------------------------------===//
-// Machine Instruction Scheduling Common Implementation
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// ScheduleTopDownLive is an implementation of ScheduleDAGInstrs that schedules
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
 /// machine instructions while updating LiveIntervals.
-class ScheduleTopDownLive : public ScheduleDAGInstrs {
-protected:
-  MachineScheduler *Pass;
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+  AliasAnalysis *AA;
+  MachineSchedStrategy *SchedImpl;
+
+  /// The top of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentTop;
+
+  /// The bottom of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentBottom;
+
+  /// The number of instructions scheduled so far. Used to cut off the
+  /// scheduler at the point determined by misched-cutoff.
+  unsigned NumInstrsScheduled;
 public:
-  ScheduleTopDownLive(MachineScheduler *P):
-    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
-    Pass(P) {}
+  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+    AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+    NumInstrsScheduled(0) {}
 
-  /// ScheduleDAGInstrs callback.
-  void Schedule();
+  ~ScheduleDAGMI() {
+    delete SchedImpl;
+  }
 
-  /// Interface implemented by the selected top-down liveinterval scheduler.
-  ///
-  /// Pick the next node to schedule, or return NULL.
-  virtual SUnit *pickNode() = 0;
+  MachineBasicBlock::iterator top() const { return CurrentTop; }
+  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
 
-  /// When all preceeding dependencies have been resolved, free this node for
-  /// scheduling.
-  virtual void releaseNode(SUnit *SU) = 0;
+  /// Implement ScheduleDAGInstrs interface.
+  void schedule();
 
 protected:
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+  bool checkSchedLimit();
+
   void releaseSucc(SUnit *SU, SDep *SuccEdge);
   void releaseSuccessors(SUnit *SU);
+  void releasePred(SUnit *SU, SDep *PredEdge);
+  void releasePredecessors(SUnit *SU);
 };
 } // namespace
 
 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
 /// NumPredsLeft reaches zero, release the successor node.
-void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
   SUnit *SuccSU = SuccEdge->getSUnit();
 
 #ifndef NDEBUG
@@ -185,164 +331,199 @@ void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) {
 #endif
   --SuccSU->NumPredsLeft;
   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
-    releaseNode(SuccSU);
+    SchedImpl->releaseTopNode(SuccSU);
 }
 
 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
-void ScheduleTopDownLive::releaseSuccessors(SUnit *SU) {
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     releaseSucc(SU, &*I);
   }
 }
 
-/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
-/// time to do some work.
-void ScheduleTopDownLive::Schedule() {
-  BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>());
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+    SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    releasePred(SU, &*I);
+  }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+                                    MachineBasicBlock::iterator InsertPos) {
+  // Fix RegionBegin if the first instruction moves down.
+  if (&*RegionBegin == MI)
+    RegionBegin = llvm::next(RegionBegin);
+  BB->splice(InsertPos, BB, MI);
+  LIS->handleMove(MI);
+  // Fix RegionBegin if another instruction moves above the first instruction.
+  if (RegionBegin == InsertPos)
+    RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+    CurrentTop = CurrentBottom;
+    return false;
+  }
+  ++NumInstrsScheduled;
+#endif
+  return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+  buildSchedGraph(AA);
 
   DEBUG(dbgs() << "********** MI Scheduling **********\n");
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
-  // Release any successors of the special Entry node. It is currently unused,
-  // but we keep up appearances.
+  if (ViewMISchedDAGs) viewGraph();
+
+  SchedImpl->initialize(this);
+
+  // Release edges from the special Entry node or to the special Exit node.
   releaseSuccessors(&EntrySU);
+  releasePredecessors(&ExitSU);
 
   // Release all DAG roots for scheduling.
   for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
        I != E; ++I) {
-    // A SUnit is ready to schedule if it has no predecessors.
+    // A SUnit is ready to top schedule if it has no predecessors.
     if (I->Preds.empty())
-      releaseNode(&(*I));
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      SchedImpl->releaseBottomNode(&(*I));
   }
 
-  InsertPos = Begin;
-  while (SUnit *SU = pickNode()) {
-    DEBUG(dbgs() << "*** Scheduling Instruction:\n"; SU->dump(this));
+  CurrentTop = RegionBegin;
+  CurrentBottom = RegionEnd;
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+          << " Scheduling Instruction:\n"; SU->dump(this));
+    if (!checkSchedLimit())
+      break;
 
     // Move the instruction to its new location in the instruction stream.
     MachineInstr *MI = SU->getInstr();
-    if (&*InsertPos == MI)
-      ++InsertPos;
+
+    if (IsTopNode) {
+      assert(SU->isTopReady() && "node still has unscheduled dependencies");
+      if (&*CurrentTop == MI)
+        ++CurrentTop;
+      else
+        moveInstruction(MI, CurrentTop);
+      // Release dependent instructions for scheduling.
+      releaseSuccessors(SU);
+    }
     else {
-      BB->splice(InsertPos, BB, MI);
-      Pass->LIS->handleMove(MI);
-      if (Begin == InsertPos)
-        Begin = MI;
+      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+      if (&*llvm::prior(CurrentBottom) == MI)
+        --CurrentBottom;
+      else {
+        if (&*CurrentTop == MI)
+          CurrentTop = llvm::next(CurrentTop);
+        moveInstruction(MI, CurrentBottom);
+        CurrentBottom = MI;
+      }
+      // Release dependent instructions for scheduling.
+      releasePredecessors(SU);
     }
-
-    // Release dependent instructions for scheduling.
-    releaseSuccessors(SU);
+    SU->isScheduled = true;
   }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
 }
 
-bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
-  // Initialize the context of the pass.
-  MF = &mf;
-  MLI = &getAnalysis<MachineLoopInfo>();
-  MDT = &getAnalysis<MachineDominatorTree>();
-  LIS = &getAnalysis<LiveIntervals>();
-  TII = MF->getTarget().getInstrInfo();
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
 
-  // Select the scheduler, or set the default.
-  MachineSchedRegistry::ScheduleDAGCtor Ctor =
-    MachineSchedRegistry::getDefault();
-  if (!Ctor) {
-    Ctor = MachineSchedOpt;
-    MachineSchedRegistry::setDefault(Ctor);
-  }
-  // Instantiate the selected scheduler.
-  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+  ScheduleDAGMI *DAG;
 
-  // Visit all machine basic blocks.
-  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
-       MBB != MBBEnd; ++MBB) {
+  unsigned NumTopReady;
+  unsigned NumBottomReady;
 
-    // Break the block into scheduling regions [I, RegionEnd), and schedule each
-    // region as soon as it is discovered.
-    unsigned RemainingCount = MBB->size();
-    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
-        RegionEnd != MBB->begin();) {
-      // The next region starts above the previous region. Look backward in the
-      // instruction stream until we find the nearest boundary.
-      MachineBasicBlock::iterator I = RegionEnd;
-      for(;I != MBB->begin(); --I, --RemainingCount) {
-        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
-          break;
-      }
-      if (I == RegionEnd) {
-        // Skip empty scheduling regions.
-        RegionEnd = llvm::prior(RegionEnd);
-        --RemainingCount;
-        continue;
-      }
-      // Skip regions with one instruction.
-      if (I == llvm::prior(RegionEnd)) {
-        RegionEnd = llvm::prior(RegionEnd);
-        continue;
-      }
-      DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
-            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
-            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
-            else dbgs() << "End";
-            dbgs() << " Remaining: " << RemainingCount << "\n");
+public:
+  virtual void initialize(ScheduleDAGMI *dag) {
+    DAG = dag;
 
-      // Inform ScheduleDAGInstrs of the region being scheduled. It calls back
-      // to our Schedule() method.
-      Scheduler->Run(MBB, I, RegionEnd, MBB->size());
-      RegionEnd = Scheduler->Begin;
-    }
-    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    assert((!ForceTopDown || !ForceBottomUp) &&
+           "-misched-topdown incompatible with -misched-bottomup");
   }
-  return true;
-}
 
-void MachineScheduler::print(raw_ostream &O, const Module* m) const {
-  // unimplemented
-}
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (DAG->top() == DAG->bottom())
+      return NULL;
 
-//===----------------------------------------------------------------------===//
-// Placeholder for extending the machine instruction scheduler.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class DefaultMachineScheduler : public ScheduleDAGInstrs {
-  MachineScheduler *Pass;
-public:
-  DefaultMachineScheduler(MachineScheduler *P):
-    ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS),
-    Pass(P) {}
+    // As an initial placeholder heuristic, schedule in the direction that has
+    // the fewest choices.
+    SUnit *SU;
+    if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+      SU = DAG->getSUnit(DAG->top());
+      IsTopNode = true;
+    }
+    else {
+      SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+      IsTopNode = false;
+    }
+    if (SU->isTopReady()) {
+      assert(NumTopReady > 0 && "bad ready count");
+      --NumTopReady;
+    }
+    if (SU->isBottomReady()) {
+      assert(NumBottomReady > 0 && "bad ready count");
+      --NumBottomReady;
+    }
+    return SU;
+  }
 
-  /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
-  /// time to do some work.
-  void Schedule();
+  virtual void releaseTopNode(SUnit *SU) {
+    ++NumTopReady;
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    ++NumBottomReady;
+  }
 };
 } // namespace
 
-static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P) {
-  return new DefaultMachineScheduler(P);
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+  assert((!ForceTopDown || !ForceBottomUp) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new ConvergingScheduler());
 }
 static MachineSchedRegistry
-SchedDefaultRegistry("default", "Activate the scheduler pass, "
-                     "but don't reorder instructions",
-                     createDefaultMachineSched);
-
-
-/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
-/// time to do some work.
-void DefaultMachineScheduler::Schedule() {
-  BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>());
-
-  DEBUG(dbgs() << "********** MI Scheduling **********\n");
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
-
-  // TODO: Put interesting things here.
-  //
-  // When this is fully implemented, it will become a subclass of
-  // ScheduleTopDownLive. So this driver will disappear.
-}
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+                        createConvergingSched);
 
 //===----------------------------------------------------------------------===//
 // Machine Instruction Shuffler for Correctness Testing
@@ -350,43 +531,83 @@ void DefaultMachineScheduler::Schedule() {
 
 #ifndef NDEBUG
 namespace {
-// Nodes with a higher number have higher priority. This way we attempt to
-// schedule the latest instructions earliest.
-//
-// TODO: Relies on the property of the BuildSchedGraph that results in SUnits
-// being ordered in sequence top-down.
-struct ShuffleSUnitOrder {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
   bool operator()(SUnit *A, SUnit *B) const {
-    return A->NodeNum < B->NodeNum;
+    if (IsReverse)
+      return A->NodeNum > B->NodeNum;
+    else
+      return A->NodeNum < B->NodeNum;
   }
 };
 
 /// Reorder instructions as much as possible.
-class InstructionShuffler : public ScheduleTopDownLive {
-  std::priority_queue<SUnit*, std::vector<SUnit*>, ShuffleSUnitOrder> Queue;
+class InstructionShuffler : public MachineSchedStrategy {
+  bool IsAlternating;
+  bool IsTopDown;
+
+  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+  // gives nodes with a higher number higher priority causing the latest
+  // instructions to be scheduled first.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+    TopQ;
+  // When scheduling bottom-up, use greater-than as the queue priority.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+    BottomQ;
 public:
-  InstructionShuffler(MachineScheduler *P):
-    ScheduleTopDownLive(P) {}
+  InstructionShuffler(bool alternate, bool topdown)
+    : IsAlternating(alternate), IsTopDown(topdown) {}
 
-  /// ScheduleTopDownLive Interface
+  virtual void initialize(ScheduleDAGMI *) {
+    TopQ.clear();
+    BottomQ.clear();
+  }
 
-  virtual SUnit *pickNode() {
-    if (Queue.empty()) return NULL;
-    SUnit *SU = Queue.top();
-    Queue.pop();
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    SUnit *SU;
+    if (IsTopDown) {
+      do {
+        if (TopQ.empty()) return NULL;
+        SU = TopQ.top();
+        TopQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = true;
+    }
+    else {
+      do {
+        if (BottomQ.empty()) return NULL;
+        SU = BottomQ.top();
+        BottomQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = false;
+    }
+    if (IsAlternating)
+      IsTopDown = !IsTopDown;
     return SU;
   }
 
-  virtual void releaseNode(SUnit *SU) {
-    Queue.push(SU);
+  virtual void releaseTopNode(SUnit *SU) {
+    TopQ.push(SU);
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    BottomQ.push(SU);
   }
 };
 } // namespace
 
-static ScheduleDAGInstrs *createInstructionShuffler(MachineScheduler *P) {
-  return new InstructionShuffler(P);
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+  bool Alternate = !ForceTopDown && !ForceBottomUp;
+  bool TopDown = !ForceBottomUp;
+  assert((TopDown || !ForceTopDown) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
 }
-static MachineSchedRegistry ShufflerRegistry("shuffle",
-                                             "Shuffle machine instructions",
-                                             createInstructionShuffler);
+static MachineSchedRegistry ShufflerRegistry(
+  "shuffle", "Shuffle machine instructions alternating directions",
+  createInstructionShuffler);
 #endif // !NDEBUG
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 394a960..830a876 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -900,7 +900,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
 void MachineVerifier::calcRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -937,7 +937,7 @@ void MachineVerifier::calcRegsPassed() {
 // similar to calcRegsPassed, only backwards.
 void MachineVerifier::calcRegsRequired() {
   // First push live-in regs to predecessors' vregsRequired.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -970,9 +970,10 @@ void MachineVerifier::calcRegsRequired() {
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+  SmallPtrSet<const MachineBasicBlock*, 8> seen;
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
-    DenseSet<const MachineBasicBlock*> seen;
+    seen.clear();
 
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
@@ -1013,8 +1014,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
   }
 
   // Now check liveness info if available
-  if (LiveVars || LiveInts)
-    calcRegsRequired();
+  calcRegsRequired();
+
+  if (MRI->isSSA() && !MF->empty()) {
+    BBInfo &MInfo = MBBInfoMap[&MF->front()];
+    for (RegSet::iterator
+         I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+         ++I)
+      report("Virtual register def doesn't dominate all uses.",
+             MRI->getVRegDef(*I));
+  }
+
   if (LiveVars)
     verifyLiveVariables();
   if (LiveInts)
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index ec1f2b4..6246c21 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -564,7 +564,8 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   addPass(RegisterCoalescerID);
 
   // PreRA instruction scheduling.
-  addPass(MachineSchedulerID);
+  if (addPass(MachineSchedulerID) != &NoPassID)
+    printAndVerify("After Machine Scheduling");
 
   // Add the selected register allocation pass.
   PM.add(RegAllocPass);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index e59aa9d..24d3e5a 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
 #include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetLowering.h"
@@ -127,6 +127,9 @@ namespace {
     /// LiveRegs - true if the register is live.
     BitVector LiveRegs;
 
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
+
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
@@ -136,23 +139,34 @@ namespace {
 
     ~SchedulePostRATDList();
 
-    /// StartBlock - Initialize register live-range state for scheduling in
+    /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
     ///
-    void StartBlock(MachineBasicBlock *BB);
+    void startBlock(MachineBasicBlock *BB);
+
+    /// Initialize the scheduler state for the next scheduling region.
+    virtual void enterRegion(MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator begin,
+                             MachineBasicBlock::iterator end,
+                             unsigned endcount);
+
+    /// Notify that the scheduler has finished scheduling the current region.
+    virtual void exitRegion();
 
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
-    void Schedule();
+    void schedule();
+
+    void EmitSchedule();
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
     void Observe(MachineInstr *MI, unsigned Count);
 
-    /// FinishBlock - Clean up register live-range state.
+    /// finishBlock - Clean up register live-range state.
     ///
-    void FinishBlock();
+    void finishBlock();
 
     /// FixupKills - Fix register kill flags that have been made
     /// invalid due to scheduling
@@ -170,6 +184,8 @@ namespace {
     // adjustments may be made to the instruction if necessary. Return
     // true if the operand has been deleted, false if not.
     bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+    void dumpSchedule() const;
   };
 }
 
@@ -202,6 +218,35 @@ SchedulePostRATDList::~SchedulePostRATDList() {
   delete AntiDepBreak;
 }
 
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+                 MachineBasicBlock::iterator begin,
+                 MachineBasicBlock::iterator end,
+                 unsigned endcount) {
+  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+  Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+  ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
@@ -256,7 +301,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 #endif
 
     // Initialize register live-range state for scheduling in this block.
-    Scheduler.StartBlock(MBB);
+    Scheduler.startBlock(MBB);
 
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
@@ -268,7 +313,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       // post-ra we don't gain anything by scheduling across calls since we
       // don't need to worry about register pressure.
       if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
-        Scheduler.Run(MBB, I, Current, CurrentCount);
+        Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+        Scheduler.schedule();
+        Scheduler.exitRegion();
         Scheduler.EmitSchedule();
         Current = MI;
         CurrentCount = Count - 1;
@@ -282,11 +329,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     assert(Count == 0 && "Instruction count mismatch!");
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
-    Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.schedule();
+    Scheduler.exitRegion();
     Scheduler.EmitSchedule();
 
     // Clean up register live-range state.
-    Scheduler.FinishBlock();
+    Scheduler.finishBlock();
 
     // Update register kills
     Scheduler.FixupKills(MBB);
@@ -298,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 /// StartBlock - Initialize register live-range state for scheduling in
 /// this block.
 ///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
   // Call the superclass.
-  ScheduleDAGInstrs::StartBlock(BB);
+  ScheduleDAGInstrs::startBlock(BB);
 
   // Reset the hazard recognizer and anti-dep breaker.
   HazardRec->Reset();
@@ -310,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
 
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
   // Build the scheduling graph.
-  BuildSchedGraph(AA);
+  buildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
     unsigned Broken =
-      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
-                                          InsertPosIndex, DbgValues);
+      AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+                                          EndIndex, DbgValues);
 
     if (Broken != 0) {
       // We made changes. Update the dependency graph.
@@ -326,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      SUnits.clear();
-      Sequence.clear();
-      EntrySU = SUnit();
-      ExitSU = SUnit();
-      BuildSchedGraph(AA);
+      ScheduleDAG::clearDAG();
+      buildSchedGraph(AA);
 
       NumFixedAnti += Broken;
     }
@@ -350,17 +396,17 @@ void SchedulePostRATDList::Schedule() {
 ///
 void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
   if (AntiDepBreak != NULL)
-    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+    AntiDepBreak->Observe(MI, Count, EndIndex);
 }
 
 /// FinishBlock - Clean up register live-range state.
 ///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
   if (AntiDepBreak != NULL)
     AntiDepBreak->FinishBlock();
 
   // Call the superclass.
-  ScheduleDAGInstrs::FinishBlock();
+  ScheduleDAGInstrs::finishBlock();
 }
 
 /// StartBlockForKills - Initialize register live-range state for updating kills
@@ -589,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 
   ReleaseSuccessors(SU);
   SU->isScheduled = true;
-  AvailableQueue.ScheduledNode(SU);
+  AvailableQueue.scheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -703,6 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   }
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
-#endif
+  unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+  RegionBegin = RegionEnd;
+
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue)
+    BB->splice(RegionEnd, BB, FirstDbgValue);
+
+  // Then re-insert them according to the given schedule.
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      BB->splice(RegionEnd, BB, SU->getInstr());
+    else
+      // Null SUnit* is a noop.
+      TII->insertNoop(*BB, RegionEnd);
+
+    // Update the Begin iterator, as the first instruction in the block
+    // may have been scheduled later.
+    if (i == 0)
+      RegionBegin = prior(RegionEnd);
+  }
+
+  // Reinsert any remaining debug_values.
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineBasicBlock::iterator OrigPrivMI = P.second;
+    BB->splice(++OrigPrivMI, BB, DbgValue);
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
 }
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index c27a485..e09b7f8 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1139,7 +1139,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   // Add the clobber lists for all the instructions we skipped earlier.
   for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
        I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
-    if (const unsigned *Defs = (*I)->getImplicitDefs())
+    if (const uint16_t *Defs = (*I)->getImplicitDefs())
       while (*Defs)
         MRI->setPhysRegUsed(*Defs++);
 
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index ef0c508..310b933 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -47,7 +47,7 @@ namespace llvm {
     /// CrossClass - True when both regs are virtual, and newRC is constrained.
     bool CrossClass;
 
-    /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+    /// Flipped - True when DstReg and SrcReg are reversed from the original
     /// copy instruction.
     bool Flipped;
 
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 94b28b6..8fd6426 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -46,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
 
 ScheduleDAG::~ScheduleDAG() {}
 
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
-  if (!Node || !Node->isMachineOpcode()) return NULL;
-  return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      SU->dump(this);
-    else
-      dbgs() << "**** NOOP ****\n";
-  }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
-                      MachineBasicBlock::iterator insertPos) {
-  BB = bb;
-  InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
   SUnits.clear();
-  Sequence.clear();
   EntrySU = SUnit();
   ExitSU = SUnit();
+}
 
-  Schedule();
-
-  DEBUG({
-      dbgs() << "*** Final schedule ***\n";
-      dumpSchedule();
-      dbgs() << '\n';
-    });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
 }
 
 /// addPred - This adds the specified edge as a pred of the current node if
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 }
 
 #ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
 ///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
   bool AnyNotSched = false;
   unsigned DeadNodes = 0;
-  unsigned Noops = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     if (!SUnits[i].isScheduled) {
       if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
       }
     }
   }
-  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
-    if (!Sequence[i])
-      ++Noops;
   assert(!AnyNotSched);
-  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
-         "The number of nodes scheduled doesn't match the expected number!");
+  return SUnits.size() - DeadNodes;
 }
 #endif
 
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc7..0000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
-  TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
-                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;  // ignore chain preds
-    if (I->getSUnit()->CopyDstRC) {
-      // Copy to physical register.
-      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
-      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
-      // Find the destination physical register.
-      unsigned Reg = 0;
-      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
-             EE = SU->Succs.end(); II != EE; ++II) {
-        if (II->isCtrl()) continue;  // ignore chain preds
-        if (II->getReg()) {
-          Reg = II->getReg();
-          break;
-        }
-      }
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
-        .addReg(VRI->second);
-    } else {
-      // Copy from physical register.
-      assert(I->getReg() && "Unknown physical register!");
-      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
-      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      (void)isNew; // Silence compiler warning.
-      assert(isNew && "Node emitted out of order - early");
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
-        .addReg(I->getReg());
-    }
-    break;
-  }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index c0ccdb3..6be1ab7 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -38,30 +38,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      bool IsPostRAFlag,
                                      LiveIntervals *lis)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag),
-    LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) {
+    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+    IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+    FirstDbgValue(0) {
   assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
   assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
          "Virtual registers must be removed prior to PostRA scheduling");
 }
 
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
-                            MachineBasicBlock::iterator begin,
-                            MachineBasicBlock::iterator end,
-                            unsigned endcount) {
-  BB = bb;
-  Begin = begin;
-  InsertPosIndex = endcount;
-
-  // Check to see if the scheduler cares about latencies.
-  UnitLatencies = ForceUnitLatencies();
-
-  ScheduleDAG::Run(bb, end);
-}
-
 /// getUnderlyingObjectFromInt - This is the function that does the work of
 /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
 static const Value *getUnderlyingObjectFromInt(const Value *V) {
@@ -141,28 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
   return 0;
 }
 
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
   LoopRegs.Deps.clear();
   if (MachineLoop *ML = MLI.getLoopFor(BB))
     if (BB == ML->getLoopLatch())
       LoopRegs.VisitLoop(ML);
 }
 
+void ScheduleDAGInstrs::finishBlock() {
+  // Nothing to do.
+}
+
 /// Initialize the map with the number of registers.
-void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
   PhysRegSet.setUniverse(Limit);
   SUnits.resize(Limit);
 }
 
 /// Clear the map without deallocating storage.
-void ScheduleDAGInstrs::Reg2SUnitsMap::clear() {
+void Reg2SUnitsMap::clear() {
   for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
     SUnits[*I].clear();
   }
   PhysRegSet.clear();
 }
 
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+                                    MachineBasicBlock::iterator begin,
+                                    MachineBasicBlock::iterator end,
+                                    unsigned endcount) {
+  BB = bb;
+  RegionBegin = begin;
+  RegionEnd = end;
+  EndIndex = endcount;
+  MISUnitMap.clear();
+
+  // Check to see if the scheduler cares about latencies.
+  UnitLatencies = forceUnitLatencies();
+
+  ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+  // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
 /// list of instructions being scheduled to scheduling barrier by adding
 /// the exit SU to the register defs and use list. This is because we want to
 /// make sure instructions which define registers that are either used by
@@ -170,8 +185,8 @@ void ScheduleDAGInstrs::Reg2SUnitsMap::clear() {
 /// especially important when the definition latency of the return value(s)
 /// are too high to be hidden by the branch or when the liveout registers
 /// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
-  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
   ExitSU.setInstr(ExitMI);
   bool AllDepKnown = ExitMI &&
     (ExitMI->isCall() || ExitMI->isBarrier());
@@ -186,19 +201,21 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
 
       if (TRI->isPhysicalRegister(Reg))
         Uses[Reg].push_back(&ExitSU);
-      else
+      else {
         assert(!IsPostRA && "Virtual register encountered after regalloc.");
+        addVRegUseDeps(&ExitSU, i);
+      }
     }
   } else {
     // For others, e.g. fallthrough, conditional branch, assume the exit
     // uses all the registers that are livein to the successor blocks.
-    SmallSet<unsigned, 8> Seen;
+    assert(Uses.empty() && "Uses in set before adding deps?");
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
            SE = BB->succ_end(); SI != SE; ++SI)
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
-        if (Seen.insert(Reg))
+        if (!Uses.contains(Reg))
           Uses[Reg].push_back(&ExitSU);
       }
   }
@@ -246,7 +263,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
       // perform its own adjustments.
       const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
       if (!UnitLatencies) {
-        ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
         ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
       }
       UseSU->addPred(dep);
@@ -436,7 +453,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
       if (!UnitLatencies) {
         // Adjust the dependence latency using operand def/use information, then
         // allow the target to perform its own adjustments.
-        ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
         const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
         ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
       }
@@ -455,20 +472,23 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 ///
 /// Map each real instruction to its SUnit.
 ///
-/// After initSUnits, the SUnits vector is cannot be resized and the scheduler
-/// may hang onto SUnit pointers. We may relax this in the future by using SUnit
-/// IDs instead of pointers.
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
 void ScheduleDAGInstrs::initSUnits() {
   // We'll be allocating one SUnit for each real instruction in the region,
   // which is contained within a basic block.
   SUnits.reserve(BB->size());
 
-  for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) {
+  for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
     MachineInstr *MI = I;
     if (MI->isDebugValue())
       continue;
 
-    SUnit *SU = NewSUnit(MI);
+    SUnit *SU = newSUnit(MI);
     MISUnitMap[MI] = SU;
 
     SU->isCall = MI->isCall();
@@ -478,11 +498,11 @@ void ScheduleDAGInstrs::initSUnits() {
     if (UnitLatencies)
       SU->Latency = 1;
     else
-      ComputeLatency(SU);
+      computeLatency(SU);
   }
 }
 
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
   // Create an SUnit for each real instruction.
   initSUnits();
 
@@ -517,11 +537,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
 
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
-  AddSchedBarrierDeps();
+  addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
   MachineInstr *PrevMI = NULL;
-  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+  for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
     if (MI && PrevMI) {
@@ -712,14 +732,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   Uses.clear();
   VRegDefs.clear();
   PendingLoads.clear();
-  MISUnitMap.clear();
-}
-
-void ScheduleDAGInstrs::FinishBlock() {
-  // Nothing to do.
 }
 
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
   // Compute the latency for the node.
   if (!InstrItins || InstrItins->isEmpty()) {
     SU->Latency = 1;
@@ -733,7 +748,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
   }
 }
 
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
                                               SDep& dep) const {
   if (!InstrItins || InstrItins->isEmpty())
     return;
@@ -808,37 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   return oss.str();
 }
 
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
-  Begin = InsertPos;
-
-  // If first instruction was a DBG_VALUE then put it back.
-  if (FirstDbgValue)
-    BB->splice(InsertPos, BB, FirstDbgValue);
-
-  // Then re-insert them according to the given schedule.
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      BB->splice(InsertPos, BB, SU->getInstr());
-    else
-      // Null SUnit* is a noop.
-      EmitNoop();
-
-    // Update the Begin iterator, as the first instruction in the block
-    // may have been scheduled later.
-    if (i == 0)
-      Begin = prior(InsertPos);
-  }
-
-  // Reinsert any remaining debug_values.
-  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
-         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
-    MachineInstr *DbgValue = P.first;
-    MachineBasicBlock::iterator OrigPrivMI = P.second;
-    BB->splice(++OrigPrivMI, BB, DbgValue);
-  }
-  DbgValues.clear();
-  FirstDbgValue = NULL;
-  return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+  return "dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4251583..38feee9 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -41,12 +41,12 @@ namespace llvm {
     static bool renderGraphFromBottomUp() {
       return true;
     }
-    
+
     static bool hasNodeAddressLabel(const SUnit *Node,
                                     const ScheduleDAG *Graph) {
       return true;
     }
-    
+
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     static std::string getEdgeAttributes(const SUnit *Node,
@@ -58,7 +58,7 @@ namespace llvm {
         return "color=blue,style=dashed";
       return "";
     }
-    
+
 
     std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
     static std::string getNodeAttributes(const SUnit *N,
@@ -81,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
 /// rendered using 'dot'.
 ///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+  // This code is only for debugging!
 #ifndef NDEBUG
-  if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName() +
-              ":" + BB->getBasicBlock()->getName());
-  else
-    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getName());
+  ViewGraph(this, Name, false, Title);
 #else
   errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 9a79217..a6bdc3b 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMSelectionDAG
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
   SelectionDAGBuilder.cpp
+  SelectionDAGDumper.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
   ScheduleDAGVLIW.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1b148ad..7c4db97 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -80,7 +80,7 @@ namespace {
     // visit, we pop off the order stack until we find an item that is
     // also in the contents set. All operations are O(log N).
     SmallPtrSet<SDNode*, 64> WorkListContents;
-    std::vector<SDNode*> WorkListOrder;
+    SmallVector<SDNode*, 64> WorkListOrder;
 
     // AA - Used for DAG load/store alias analysis.
     AliasAnalysis &AA;
@@ -381,6 +381,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 /// specified expression for the same cost as the expression itself, or 2 if we
 /// can compute the negated form more cheaply than the expression itself.
 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               const TargetLowering &TLI,
                                const TargetOptions *Options,
                                unsigned Depth = 0) {
   // No compile time optimizations on this type.
@@ -406,12 +407,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     // FIXME: determine better conditions for this xform.
     if (!Options->UnsafeFPMath) return 0;
 
+    // After operation legalization, it might not be legal to create new FSUBs.
+    if (LegalOperations &&
+        !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
+      return 0;
+
     // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
-                                    Depth + 1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
                               Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
@@ -425,17 +431,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     if (Options->HonorSignDependentRoundingFPMath()) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
-                                    Depth + 1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
 
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
                               Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FSIN:
-    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
                               Depth + 1);
   }
 }
@@ -464,6 +470,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
                            &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
@@ -493,6 +500,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
                            &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
@@ -997,8 +1005,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
     // worklist *should* contain, and check the node we want to visit is should
     // actually be visited.
     do {
-      N = WorkListOrder.back();
-      WorkListOrder.pop_back();
+      N = WorkListOrder.pop_back_val();
     } while (!WorkListContents.erase(N));
 
     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
@@ -5507,11 +5514,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
       N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
-  if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
-  if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
@@ -5549,16 +5558,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   // fold (fsub 0, B) -> -B
   if (DAG.getTarget().Options.UnsafeFPMath &&
       N0CFP && N0CFP->getValueAPF().isZero()) {
-    if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
+    if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
+  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
+  // If 'unsafe math' is enabled, fold
+  //    (fsub x, (fadd x, y)) -> (fneg y) &
+  //    (fsub x, (fadd y, x)) -> (fneg y)
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    if (N1.getOpcode() == ISD::FADD) {
+      SDValue N10 = N1->getOperand(0);
+      SDValue N11 = N1->getOperand(1);
+
+      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+                                          &DAG.getTarget().Options))
+        return GetNegatedExpression(N11, DAG, LegalOperations);
+      else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+                                               &DAG.getTarget().Options))
+        return GetNegatedExpression(N10, DAG, LegalOperations);
+    }
+  }
+
   return SDValue();
 }
 
@@ -5568,6 +5594,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5598,9 +5625,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
                                        &DAG.getTarget().Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 
                                          &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
@@ -5628,6 +5655,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5641,9 +5669,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
                                        &DAG.getTarget().Options)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
                                          &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
@@ -5897,7 +5925,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options))
+  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+                         &DAG.getTarget().Options))
     return GetNegatedExpression(N0, DAG, LegalOperations);
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
@@ -6129,8 +6158,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
 
 /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
 /// uses N as its base pointer and that N may be folded in the load / store
-/// addressing mode. FIXME: This currently only looks for folding of
-/// [reg +/- imm] addressing modes.
+/// addressing mode.
 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
                                     SelectionDAG &DAG,
                                     const TargetLowering &TLI) {
@@ -6150,15 +6178,19 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   if (N->getOpcode() == ISD::ADD) {
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
+      // [reg +/- imm]
       AM.BaseOffs = Offset->getSExtValue();
     else
-      return false;
+      // [reg +/- reg]
+      AM.Scale = 1;
   } else if (N->getOpcode() == ISD::SUB) {
     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
     if (Offset)
+      // [reg +/- imm]
       AM.BaseOffs = -Offset->getSExtValue();
     else
-      return false;
+      // [reg +/- reg]
+      AM.Scale = 1;
   } else
     return false;
 
@@ -7187,6 +7219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
+    // If the result of load has to be truncated, then it's not necessarily
+    // profitable.
+    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+      return SDValue();
+
     if (InVec.getOpcode() == ISD::BITCAST) {
       // Don't duplicate a load with other uses.
       if (!InVec.hasOneUse())
@@ -7287,17 +7324,36 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     // Note that this replacement assumes that the extractvalue is the only
     // use of the load; that's okay because we don't want to perform this
     // transformation in other cases anyway.
-    SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
-                               LN0->getPointerInfo().getWithOffset(PtrOff),
-                               LN0->isVolatile(), LN0->isNonTemporal(), 
-                               LN0->isInvariant(), Align);
+    SDValue Load;
+    SDValue Chain;
+    if (NVT.bitsGT(LVT)) {
+      // If the result type of vextract is wider than the load, then issue an
+      // extending load instead.
+      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+      Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+      Chain = Load.getValue(1);
+    } else {
+      Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                         LN0->getPointerInfo().getWithOffset(PtrOff),
+                         LN0->isVolatile(), LN0->isNonTemporal(), 
+                         LN0->isInvariant(), Align);
+      Chain = Load.getValue(1);
+      if (NVT.bitsLT(LVT))
+        Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+      else
+        Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+    }
     WorkListRemover DeadNodes(*this);
     SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
-    SDValue To[] = { Load.getValue(0), Load.getValue(1) };
+    SDValue To[] = { Load, Chain };
     DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
     // Since we're explcitly calling ReplaceAllUses, add the new node to the
     // worklist explicitly as well.
     AddToWorkList(Load.getNode());
+    AddUsersToWorkList(Load.getNode()); // Add users too
     // Make sure to revisit this node to clean it up; it will usually be dead.
     AddToWorkList(N);
     return SDValue(N, 0);
@@ -7367,6 +7423,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // will be type-legalized to complex code sequences.
   // We perform this optimization only before the operation legalizer because we
   // may introduce illegal operations.
+  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+  // turn into a single shuffle instruction.
   if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
       ValidTypes) {
     bool isLE = TLI.isLittleEndian();
@@ -7407,6 +7465,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                  VecVT, &Ops[0], Ops.size());
 
+    // The new BUILD_VECTOR node has the potential to be further optimized.
+    AddToWorkList(BV.getNode());
     // Bitcast to the desired type.
     return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
   }
@@ -7414,6 +7474,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   // at most two distinct vectors, turn this into a shuffle node.
+
+  // May only combine to shuffle after legalize if shuffle is legal.
+  if (LegalOperations &&
+      !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+    return SDValue();
+
   SDValue VecIn1, VecIn2;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     // Ignore undef inputs.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index fd8ce78..9f4a44a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -136,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
       return 0;
   }
 
-  // Look up the value to see if we already have a register for it. We
-  // cache values defined by Instructions across blocks, and other values
-  // only locally. This is because Instructions already have the SSA
-  // def-dominates-use requirement enforced.
-  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
-  if (I != FuncInfo.ValueMap.end())
-    return I->second;
-
-  unsigned Reg = LocalValueMap[V];
+  // Look up the value to see if we already have a register for it.
+  unsigned Reg = lookUpRegForValue(V);
   if (Reg != 0)
     return Reg;
 
@@ -199,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
       uint32_t IntBitWidth = IntVT.getSizeInBits();
       bool isExact;
       (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
-                                APFloat::rmTowardZero, &isExact);
+                                  APFloat::rmTowardZero, &isExact);
       if (isExact) {
         APInt IntVal(IntBitWidth, x);
 
@@ -577,12 +570,16 @@ bool FastISel::SelectCall(const User *I) {
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
-        !FuncInfo.MF->getMMI().hasDebugInfo())
+        !FuncInfo.MF->getMMI().hasDebugInfo()) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     const Value *Address = DI->getAddress();
-    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+    if (!Address || isa<UndefValue>(Address)) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     unsigned Reg = 0;
     unsigned Offset = 0;
@@ -590,16 +587,25 @@ bool FastISel::SelectCall(const User *I) {
       // Some arguments' frame index is recorded during argument lowering.
       Offset = FuncInfo.getArgumentFrameIndex(Arg);
       if (Offset)
-	Reg = TRI.getFrameRegister(*FuncInfo.MF);
+        Reg = TRI.getFrameRegister(*FuncInfo.MF);
     }
     if (!Reg)
-      Reg = getRegForValue(Address);
+      Reg = lookUpRegForValue(Address);
+
+    if (!Reg && isa<Instruction>(Address) &&
+        (!isa<AllocaInst>(Address) ||
+         !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+      Reg = FuncInfo.InitializeRegForValue(Address);
 
     if (Reg)
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
               TII.get(TargetOpcode::DBG_VALUE))
         .addReg(Reg, RegState::Debug).addImm(Offset)
         .addMetadata(DI->getVariable());
+    else
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
     return true;
   }
   case Intrinsic::dbg_value: {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 31df458..1b84b13 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3032,6 +3032,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Results[0].getValue(1));
     break;
   }
+  case ISD::FSUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+           "Don't know how to expand this FP subtraction!");
+    Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
   case ISD::SUB: {
     EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3590,10 +3600,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::FDIV:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
-    Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp3, DAG.getIntPtrConstant(0)));
     break;
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1a27f3f..ff0136e 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -470,7 +470,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
 
 
 /// Main resource tracking point.
-void ResourcePriorityQueue::ScheduledNode(SUnit *SU) {
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
   // Use NULL entry as an event marker to reset
   // the DFA state.
   if (!SU) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34ee1f3..24da432 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
     SmallVector<SUnit *, 16> Queue;
 
     bool empty() const { return Queue.empty(); }
-    
+
     void push(SUnit *U) {
       Queue.push_back(U);
     }
@@ -101,8 +101,8 @@ private:
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
 
-  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
-  bool ForceUnitLatencies() const { return true; }
+  /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool forceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
   DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
     ReleasePred(SU, &*I);
     if (I->isAssignedRegDep()) {
       // This is a physical register dependency and it's impossible or
-      // expensive to copy the register. Make sure nothing that can 
+      // expensive to copy the register. Make sure nothing that can
       // clobber the register is scheduled between the predecessor and
       // this node.
       if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
                                    SDValue(LoadNode, 1));
 
-    SUnit *NewSU = NewSUnit(N);
+    SUnit *NewSU = newSUnit(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
-      
+
     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
     for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
       if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       LoadSU = &SUnits[LoadNode->getNodeId()];
       isNewLoad = false;
     } else {
-      LoadSU = NewSUnit(LoadNode);
+      LoadSU = newSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
     }
 
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
         D.setSUnit(LoadSU);
         AddPred(SuccDep, D);
       }
-    } 
+    }
     if (isNewLoad) {
       AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
     }
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
-  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
 
-  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyToSU->CopySrcRC = DestRC;
   CopyToSU->CopyDstRC = SrcRC;
 
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/true);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 1017d36..f44adfc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -232,7 +232,7 @@ private:
   /// Updates the topological ordering if required.
   SUnit *CreateNewSUnit(SDNode *N) {
     unsigned NumSUnits = SUnits.size();
-    SUnit *NewNode = NewSUnit(N);
+    SUnit *NewNode = newSUnit(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
       Topo.InitDAGTopologicalSorting();
@@ -250,9 +250,9 @@ private:
     return NewNode;
   }
 
-  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
   /// need actual latency information but the hybrid scheduler does.
-  bool ForceUnitLatencies() const {
+  bool forceUnitLatencies() const {
     return !NeedLatency;
   }
 };
@@ -327,6 +327,12 @@ void ScheduleDAGRRList::Schedule() {
   ListScheduleBottomUp();
 
   AvailableQueue->releaseState();
+
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
 }
 
 //===----------------------------------------------------------------------===//
@@ -348,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 #endif
   --PredSU->NumSuccsLeft;
 
-  if (!ForceUnitLatencies()) {
+  if (!forceUnitLatencies()) {
     // Updating predecessor's height. This is now the cycle when the
     // predecessor can be scheduled without causing a pipeline stall.
     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -695,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
   Sequence.push_back(SU);
 
-  AvailableQueue->ScheduledNode(SU);
+  AvailableQueue->scheduledNode(SU);
 
   // If HazardRec is disabled, and each inst counts as one cycle, then
   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -842,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   else {
     AvailableQueue->push(SU);
   }
-  AvailableQueue->UnscheduledNode(SU);
+  AvailableQueue->unscheduledNode(SU);
 }
 
 /// After backtracking, the hazard checker needs to be restored to a state
@@ -963,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       LoadNode->setNodeId(LoadSU->NodeNum);
 
       InitNumRegDefsLeft(LoadSU);
-      ComputeLatency(LoadSU);
+      computeLatency(LoadSU);
     }
 
     SUnit *NewSU = CreateNewSUnit(N);
@@ -981,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       NewSU->isCommutable = true;
 
     InitNumRegDefsLeft(NewSU);
-    ComputeLatency(NewSU);
+    computeLatency(NewSU);
 
     // Record all the edges to and from the old SU, by category.
     SmallVector<SDep, 4> ChainPreds;
@@ -1160,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -1286,7 +1292,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
 
@@ -1475,7 +1481,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/true);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
@@ -1681,9 +1687,9 @@ public:
 
   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
 
-  void ScheduledNode(SUnit *SU);
+  void scheduledNode(SUnit *SU);
 
-  void UnscheduledNode(SUnit *SU);
+  void unscheduledNode(SUnit *SU);
 
 protected:
   bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1984,7 +1990,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
   return PDiff;
 }
 
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -2053,7 +2059,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
   dumpRegPressure();
 }
 
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -2661,7 +2667,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
                                          ScheduleDAGRRList *scheduleDAG,
                                          const TargetInstrInfo *TII,
                                          const TargetRegisterInfo *TRI) {
-  const unsigned *ImpDefs
+  const uint16_t *ImpDefs
     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
   const uint32_t *RegMask = getNodeRegMask(SU->getNode());
   if(!ImpDefs && !RegMask)
@@ -2680,7 +2686,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
         return true;
 
       if (ImpDefs)
-        for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
           // Return true if SU clobbers this physical register use and the
           // definition of the register reaches from DepSU. IsReachable queries
           // a topological forward sort of the DAG (following the successors).
@@ -2699,13 +2705,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
                                   const TargetRegisterInfo *TRI) {
   SDNode *N = SuccSU->getNode();
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
        SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
-    const unsigned *SUImpDefs =
+    const uint16_t *SUImpDefs =
       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
     const uint32_t *SURegMask = getNodeRegMask(SUNode);
     if (!SUImpDefs && !SURegMask)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6..69dd813 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
            "instructions take for targets with no itinerary"));
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-  : ScheduleDAG(mf),
+  : ScheduleDAG(mf), BB(0), DAG(0),
     InstrItins(mf.getTarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
-                             MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+  BB = bb;
   DAG = dag;
-  ScheduleDAG::Run(bb, insertPos);
+
+  // Clear the scheduler's SUnit DAG.
+  ScheduleDAG::clearDAG();
+  Sequence.clear();
+
+  // Invoke the target's selection of scheduler.
+  Schedule();
 }
 
 /// NewSUnit - Creates a new SUnit and return a ptr to it.
 ///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
 #ifndef NDEBUG
   const SUnit *Addr = 0;
   if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
 }
 
 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
-  SUnit *SU = NewSUnit(Old->getNode());
+  SUnit *SU = newSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
   SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     // If this node has already been processed, stop now.
     if (NI->getNodeId() != -1) continue;
 
-    SUnit *NodeSUnit = NewSUnit(NI);
+    SUnit *NodeSUnit = newSUnit(NI);
 
     // See if anything is glued to this node, if so, add them to glued
     // nodes.  Nodes can have at most one glue input and one glue output.  Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     InitNumRegDefsLeft(NodeSUnit);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    ComputeLatency(NodeSUnit);
+    computeLatency(NodeSUnit);
   }
 
   // Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 
   // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
+  bool UnitLatencies = forceUnitLatencies();
 
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
   }
 }
 
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
   SDNode *N = SU->getNode();
 
   // TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   }
 
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies()) {
+  if (forceUnitLatencies()) {
     SU->Latency = 1;
     return;
   }
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
       SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
                                                unsigned OpIdx, SDep& dep) const{
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies())
+  if (forceUnitLatencies())
     return;
 
   if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   }
 }
 
+void ScheduleDAGSDNodes::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+  unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
 namespace {
   struct OrderSorter {
     bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                MachineBasicBlock::iterator InsertPos) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
+        if (II->getReg()) {
+          Reg = II->getReg();
+          break;
+        }
+      }
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(VRI->second);
+    } else {
+      // Copy from physical register.
+      assert(I->getReg() && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      (void)isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+        .addReg(I->getReg());
+    }
+    break;
+  }
+}
 
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     SUnit *SU = Sequence[i];
     if (!SU) {
       // Null SUnit* is a noop.
-      EmitNoop();
+      TII->insertNoop(*Emitter.getBlock(), InsertPos);
       continue;
     }
 
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     // SDNode and any glued SDNodes and append them to the block.
     if (!SU->getNode()) {
       // Emit a copy.
-      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
       continue;
     }
 
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     }
     // Add trailing DbgValue's before the terminator. FIXME: May want to add
     // some of them before one or more conditional branches?
+    SmallVector<MachineInstr*, 8> DbgMIs;
     while (DI != DE) {
-      MachineBasicBlock *InsertBB = Emitter.getBlock();
-      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
-      if (!(*DI)->isInvalidated()) {
-        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
-        if (DbgMI)
-          InsertBB->insert(Pos, DbgMI);
-      }
+      if (!(*DI)->isInvalidated())
+        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+          DbgMIs.push_back(DbgMI);
       ++DI;
     }
+
+    MachineBasicBlock *InsertBB = Emitter.getBlock();
+    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
   }
 
-  BB = Emitter.getBlock();
   InsertPos = Emitter.getInsertPos();
-  return BB;
+  return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+  return "sunit-dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 17b4901..75940ec 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
   ///
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
+    MachineBasicBlock *BB;
     SelectionDAG *DAG;                    // DAG of the current basic block
     const InstrItineraryData *InstrItins;
 
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
+
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
     virtual ~ScheduleDAGSDNodes() {}
 
     /// Run - perform scheduling.
     ///
-    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
-             MachineBasicBlock::iterator insertPos);
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb);
 
     /// isPassiveNode - Return true if the node is a non-scheduled leaf.
     ///
@@ -68,7 +71,7 @@ namespace llvm {
 
     /// NewSUnit - Creates a new SUnit and return a ptr to it.
     ///
-    SUnit *NewSUnit(SDNode *N);
+    SUnit *newSUnit(SDNode *N);
 
     /// Clone - Creates a clone of the specified SUnit. It does not copy the
     /// predecessors / successors info nor the temporary scheduling states.
@@ -79,7 +82,7 @@ namespace llvm {
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
+    void BuildSchedGraph(AliasAnalysis *AA);
 
     /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
     /// CopyToReg and its only active data operands are CopyFromReg within a
@@ -91,30 +94,41 @@ namespace llvm {
     ///
     void InitNumRegDefsLeft(SUnit *SU);
 
-    /// ComputeLatency - Compute node latency.
+    /// computeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU);
+    virtual void computeLatency(SUnit *SU);
 
-    /// ComputeOperandLatency - Override dependence edge latency using
+    /// computeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const { }
 
-    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+    virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
                                        unsigned OpIdx, SDep& dep) const;
 
-    virtual MachineBasicBlock *EmitSchedule();
-
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
     virtual void Schedule() = 0;
 
+    /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+    /// consistent with the Sequence of scheduled instructions.
+    void VerifyScheduledSequence(bool isBottomUp);
+
+    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+    /// according to the order specified in Sequence.
+    ///
+    MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
     virtual void dumpNode(const SUnit *SU) const;
 
+    void dumpSchedule() const;
+
     virtual std::string getGraphNodeLabel(const SUnit *SU) const;
 
+    virtual std::string getDAGName() const;
+
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
     /// RegDefIter - In place iteration over the values defined by an
@@ -160,6 +174,9 @@ namespace llvm {
     /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
     void BuildSchedUnits();
     void AddSchedEdges();
+
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                         MachineBasicBlock::iterator InsertPos);
   };
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 7d12509..c851291 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -158,7 +158,7 @@ void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 
   releaseSuccessors(SU);
   SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
+  AvailableQueue->scheduledNode(SU);
 }
 
 /// listScheduleTopDown - The main loop of list scheduling for top-down
@@ -202,7 +202,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
     // don't advance the hazard recognizer.
     if (AvailableQueue->empty()) {
       // Reset DFA state.
-      AvailableQueue->ScheduledNode(0);
+      AvailableQueue->scheduledNode(0);
       ++CurCycle;
       continue;
     }
@@ -261,7 +261,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
   }
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
+  VerifyScheduledSequence(/*isBottomUp=*/false);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 796abf4..e3a7305 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -124,20 +124,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   if (i == e) return false;
 
   // Do not accept build_vectors that aren't all constants or which have non-~0
-  // elements.
+  // elements. We have to be a bit careful here, as the type of the constant
+  // may not be the same as the type of the vector elements due to type
+  // legalization (the elements are promoted to a legal type for the target and
+  // a vector of a type may be legal when the base element type is not).
+  // We only want to check enough bits to cover the vector elements, because
+  // we care if the resultant vector is all ones, not whether the individual
+  // constants are.
   SDValue NotZero = N->getOperand(i);
+  unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
   if (isa<ConstantSDNode>(NotZero)) {
-    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+    if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+        EltSize)
       return false;
   } else if (isa<ConstantFPSDNode>(NotZero)) {
-    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
-                bitcastToAPInt().isAllOnesValue())
+    if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+              .bitcastToAPInt().countTrailingOnes() < EltSize)
       return false;
   } else
     return false;
 
   // Okay, we have at least one ~0 value, check to see if the rest match or are
-  // undefs.
+  // undefs. Even with the above element type twiddling, this should be OK, as
+  // the same type legalization should have applied to all the elements.
   for (++i; i != e; ++i)
     if (N->getOperand(i) != NotZero &&
         N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -5904,571 +5913,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
   return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
 }
 
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
-  switch (getOpcode()) {
-  default:
-    if (getOpcode() < ISD::BUILTIN_OP_END)
-      return "<<Unknown DAG Node>>";
-    if (isMachineOpcode()) {
-      if (G)
-        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
-          if (getMachineOpcode() < TII->getNumOpcodes())
-            return TII->getName(getMachineOpcode());
-      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
-    }
-    if (G) {
-      const TargetLowering &TLI = G->getTargetLoweringInfo();
-      const char *Name = TLI.getTargetNodeName(getOpcode());
-      if (Name) return Name;
-      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
-    }
-    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
-  case ISD::DELETED_NODE:
-    return "<<Deleted Node!>>";
-#endif
-  case ISD::PREFETCH:      return "Prefetch";
-  case ISD::MEMBARRIER:    return "MemBarrier";
-  case ISD::ATOMIC_FENCE:    return "AtomicFence";
-  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
-  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
-  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
-  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
-  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
-  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
-  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
-  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
-  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
-  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
-  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
-  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
-  case ISD::ATOMIC_LOAD:        return "AtomicLoad";
-  case ISD::ATOMIC_STORE:       return "AtomicStore";
-  case ISD::PCMARKER:      return "PCMarker";
-  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
-  case ISD::SRCVALUE:      return "SrcValue";
-  case ISD::MDNODE_SDNODE: return "MDNode";
-  case ISD::EntryToken:    return "EntryToken";
-  case ISD::TokenFactor:   return "TokenFactor";
-  case ISD::AssertSext:    return "AssertSext";
-  case ISD::AssertZext:    return "AssertZext";
-
-  case ISD::BasicBlock:    return "BasicBlock";
-  case ISD::VALUETYPE:     return "ValueType";
-  case ISD::Register:      return "Register";
-  case ISD::RegisterMask:  return "RegisterMask";
-  case ISD::Constant:      return "Constant";
-  case ISD::ConstantFP:    return "ConstantFP";
-  case ISD::GlobalAddress: return "GlobalAddress";
-  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
-  case ISD::FrameIndex:    return "FrameIndex";
-  case ISD::JumpTable:     return "JumpTable";
-  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
-  case ISD::RETURNADDR: return "RETURNADDR";
-  case ISD::FRAMEADDR: return "FRAMEADDR";
-  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
-  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
-  case ISD::LSDAADDR: return "LSDAADDR";
-  case ISD::EHSELECTION: return "EHSELECTION";
-  case ISD::EH_RETURN: return "EH_RETURN";
-  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
-  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
-  case ISD::ConstantPool:  return "ConstantPool";
-  case ISD::ExternalSymbol: return "ExternalSymbol";
-  case ISD::BlockAddress:  return "BlockAddress";
-  case ISD::INTRINSIC_WO_CHAIN:
-  case ISD::INTRINSIC_VOID:
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
-    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
-    if (IID < Intrinsic::num_intrinsics)
-      return Intrinsic::getName((Intrinsic::ID)IID);
-    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
-      return TII->getName(IID);
-    llvm_unreachable("Invalid intrinsic ID");
-  }
-
-  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
-  case ISD::TargetConstant: return "TargetConstant";
-  case ISD::TargetConstantFP:return "TargetConstantFP";
-  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
-  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
-  case ISD::TargetFrameIndex: return "TargetFrameIndex";
-  case ISD::TargetJumpTable:  return "TargetJumpTable";
-  case ISD::TargetConstantPool:  return "TargetConstantPool";
-  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
-  case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
-  case ISD::CopyToReg:     return "CopyToReg";
-  case ISD::CopyFromReg:   return "CopyFromReg";
-  case ISD::UNDEF:         return "undef";
-  case ISD::MERGE_VALUES:  return "merge_values";
-  case ISD::INLINEASM:     return "inlineasm";
-  case ISD::EH_LABEL:      return "eh_label";
-  case ISD::HANDLENODE:    return "handlenode";
-
-  // Unary operators
-  case ISD::FABS:   return "fabs";
-  case ISD::FNEG:   return "fneg";
-  case ISD::FSQRT:  return "fsqrt";
-  case ISD::FSIN:   return "fsin";
-  case ISD::FCOS:   return "fcos";
-  case ISD::FTRUNC: return "ftrunc";
-  case ISD::FFLOOR: return "ffloor";
-  case ISD::FCEIL:  return "fceil";
-  case ISD::FRINT:  return "frint";
-  case ISD::FNEARBYINT: return "fnearbyint";
-  case ISD::FEXP:   return "fexp";
-  case ISD::FEXP2:  return "fexp2";
-  case ISD::FLOG:   return "flog";
-  case ISD::FLOG2:  return "flog2";
-  case ISD::FLOG10: return "flog10";
-
-  // Binary operators
-  case ISD::ADD:    return "add";
-  case ISD::SUB:    return "sub";
-  case ISD::MUL:    return "mul";
-  case ISD::MULHU:  return "mulhu";
-  case ISD::MULHS:  return "mulhs";
-  case ISD::SDIV:   return "sdiv";
-  case ISD::UDIV:   return "udiv";
-  case ISD::SREM:   return "srem";
-  case ISD::UREM:   return "urem";
-  case ISD::SMUL_LOHI:  return "smul_lohi";
-  case ISD::UMUL_LOHI:  return "umul_lohi";
-  case ISD::SDIVREM:    return "sdivrem";
-  case ISD::UDIVREM:    return "udivrem";
-  case ISD::AND:    return "and";
-  case ISD::OR:     return "or";
-  case ISD::XOR:    return "xor";
-  case ISD::SHL:    return "shl";
-  case ISD::SRA:    return "sra";
-  case ISD::SRL:    return "srl";
-  case ISD::ROTL:   return "rotl";
-  case ISD::ROTR:   return "rotr";
-  case ISD::FADD:   return "fadd";
-  case ISD::FSUB:   return "fsub";
-  case ISD::FMUL:   return "fmul";
-  case ISD::FDIV:   return "fdiv";
-  case ISD::FMA:    return "fma";
-  case ISD::FREM:   return "frem";
-  case ISD::FCOPYSIGN: return "fcopysign";
-  case ISD::FGETSIGN:  return "fgetsign";
-  case ISD::FPOW:   return "fpow";
-
-  case ISD::FPOWI:  return "fpowi";
-  case ISD::SETCC:       return "setcc";
-  case ISD::SELECT:      return "select";
-  case ISD::VSELECT:     return "vselect";
-  case ISD::SELECT_CC:   return "select_cc";
-  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
-  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
-  case ISD::CONCAT_VECTORS:      return "concat_vectors";
-  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
-  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
-  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
-  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
-  case ISD::CARRY_FALSE:         return "carry_false";
-  case ISD::ADDC:        return "addc";
-  case ISD::ADDE:        return "adde";
-  case ISD::SADDO:       return "saddo";
-  case ISD::UADDO:       return "uaddo";
-  case ISD::SSUBO:       return "ssubo";
-  case ISD::USUBO:       return "usubo";
-  case ISD::SMULO:       return "smulo";
-  case ISD::UMULO:       return "umulo";
-  case ISD::SUBC:        return "subc";
-  case ISD::SUBE:        return "sube";
-  case ISD::SHL_PARTS:   return "shl_parts";
-  case ISD::SRA_PARTS:   return "sra_parts";
-  case ISD::SRL_PARTS:   return "srl_parts";
-
-  // Conversion operators.
-  case ISD::SIGN_EXTEND: return "sign_extend";
-  case ISD::ZERO_EXTEND: return "zero_extend";
-  case ISD::ANY_EXTEND:  return "any_extend";
-  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
-  case ISD::TRUNCATE:    return "truncate";
-  case ISD::FP_ROUND:    return "fp_round";
-  case ISD::FLT_ROUNDS_: return "flt_rounds";
-  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
-  case ISD::FP_EXTEND:   return "fp_extend";
-
-  case ISD::SINT_TO_FP:  return "sint_to_fp";
-  case ISD::UINT_TO_FP:  return "uint_to_fp";
-  case ISD::FP_TO_SINT:  return "fp_to_sint";
-  case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BITCAST:     return "bitcast";
-  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
-  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
-  case ISD::CONVERT_RNDSAT: {
-    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: llvm_unreachable("Unknown cvt code!");
-    case ISD::CVT_FF:  return "cvt_ff";
-    case ISD::CVT_FS:  return "cvt_fs";
-    case ISD::CVT_FU:  return "cvt_fu";
-    case ISD::CVT_SF:  return "cvt_sf";
-    case ISD::CVT_UF:  return "cvt_uf";
-    case ISD::CVT_SS:  return "cvt_ss";
-    case ISD::CVT_SU:  return "cvt_su";
-    case ISD::CVT_US:  return "cvt_us";
-    case ISD::CVT_UU:  return "cvt_uu";
-    }
-  }
-
-    // Control flow instructions
-  case ISD::BR:      return "br";
-  case ISD::BRIND:   return "brind";
-  case ISD::BR_JT:   return "br_jt";
-  case ISD::BRCOND:  return "brcond";
-  case ISD::BR_CC:   return "br_cc";
-  case ISD::CALLSEQ_START:  return "callseq_start";
-  case ISD::CALLSEQ_END:    return "callseq_end";
-
-    // Other operators
-  case ISD::LOAD:               return "load";
-  case ISD::STORE:              return "store";
-  case ISD::VAARG:              return "vaarg";
-  case ISD::VACOPY:             return "vacopy";
-  case ISD::VAEND:              return "vaend";
-  case ISD::VASTART:            return "vastart";
-  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
-  case ISD::EXTRACT_ELEMENT:    return "extract_element";
-  case ISD::BUILD_PAIR:         return "build_pair";
-  case ISD::STACKSAVE:          return "stacksave";
-  case ISD::STACKRESTORE:       return "stackrestore";
-  case ISD::TRAP:               return "trap";
-
-  // Bit manipulation
-  case ISD::BSWAP:           return "bswap";
-  case ISD::CTPOP:           return "ctpop";
-  case ISD::CTTZ:            return "cttz";
-  case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
-  case ISD::CTLZ:            return "ctlz";
-  case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
-
-  // Trampolines
-  case ISD::INIT_TRAMPOLINE: return "init_trampoline";
-  case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
-  case ISD::CONDCODE:
-    switch (cast<CondCodeSDNode>(this)->get()) {
-    default: llvm_unreachable("Unknown setcc condition!");
-    case ISD::SETOEQ:  return "setoeq";
-    case ISD::SETOGT:  return "setogt";
-    case ISD::SETOGE:  return "setoge";
-    case ISD::SETOLT:  return "setolt";
-    case ISD::SETOLE:  return "setole";
-    case ISD::SETONE:  return "setone";
-
-    case ISD::SETO:    return "seto";
-    case ISD::SETUO:   return "setuo";
-    case ISD::SETUEQ:  return "setue";
-    case ISD::SETUGT:  return "setugt";
-    case ISD::SETUGE:  return "setuge";
-    case ISD::SETULT:  return "setult";
-    case ISD::SETULE:  return "setule";
-    case ISD::SETUNE:  return "setune";
-
-    case ISD::SETEQ:   return "seteq";
-    case ISD::SETGT:   return "setgt";
-    case ISD::SETGE:   return "setge";
-    case ISD::SETLT:   return "setlt";
-    case ISD::SETLE:   return "setle";
-    case ISD::SETNE:   return "setne";
-
-    case ISD::SETTRUE:   return "settrue";
-    case ISD::SETTRUE2:  return "settrue2";
-    case ISD::SETFALSE:  return "setfalse";
-    case ISD::SETFALSE2: return "setfalse2";
-    }
-  }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
-  switch (AM) {
-  default:
-    return "";
-  case ISD::PRE_INC:
-    return "<pre-inc>";
-  case ISD::PRE_DEC:
-    return "<pre-dec>";
-  case ISD::POST_INC:
-    return "<post-inc>";
-  case ISD::POST_DEC:
-    return "<post-dec>";
-  }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
-  std::string S = "< ";
-
-  if (isZExt())
-    S += "zext ";
-  if (isSExt())
-    S += "sext ";
-  if (isInReg())
-    S += "inreg ";
-  if (isSRet())
-    S += "sret ";
-  if (isByVal())
-    S += "byval ";
-  if (isNest())
-    S += "nest ";
-  if (getByValAlign())
-    S += "byval-align:" + utostr(getByValAlign()) + " ";
-  if (getOrigAlign())
-    S += "orig-align:" + utostr(getOrigAlign()) + " ";
-  if (getByValSize())
-    S += "byval-size:" + utostr(getByValSize()) + " ";
-  return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
-  print(dbgs(), G);
-  dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
-  OS << (void*)this << ": ";
-
-  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
-    if (i) OS << ",";
-    if (getValueType(i) == MVT::Other)
-      OS << "ch";
-    else
-      OS << getValueType(i).getEVTString();
-  }
-  OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
-  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
-    if (!MN->memoperands_empty()) {
-      OS << "<";
-      OS << "Mem:";
-      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
-           e = MN->memoperands_end(); i != e; ++i) {
-        OS << **i;
-        if (llvm::next(i) != e)
-          OS << " ";
-      }
-      OS << ">";
-    }
-  } else if (const ShuffleVectorSDNode *SVN =
-               dyn_cast<ShuffleVectorSDNode>(this)) {
-    OS << "<";
-    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
-      int Idx = SVN->getMaskElt(i);
-      if (i) OS << ",";
-      if (Idx < 0)
-        OS << "u";
-      else
-        OS << Idx;
-    }
-    OS << ">";
-  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
-    OS << '<' << CSDN->getAPIntValue() << '>';
-  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
-      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
-      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
-    else {
-      OS << "<APFloat(";
-      CSDN->getValueAPF().bitcastToAPInt().dump();
-      OS << ")>";
-    }
-  } else if (const GlobalAddressSDNode *GADN =
-             dyn_cast<GlobalAddressSDNode>(this)) {
-    int64_t offset = GADN->getOffset();
-    OS << '<';
-    WriteAsOperand(OS, GADN->getGlobal());
-    OS << '>';
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = GADN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
-    OS << "<" << FIDN->getIndex() << ">";
-  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
-    OS << "<" << JTDN->getIndex() << ">";
-    if (unsigned int TF = JTDN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
-    int offset = CP->getOffset();
-    if (CP->isMachineConstantPoolEntry())
-      OS << "<" << *CP->getMachineCPVal() << ">";
-    else
-      OS << "<" << *CP->getConstVal() << ">";
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = CP->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
-    OS << "<";
-    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
-    if (LBB)
-      OS << LBB->getName() << " ";
-    OS << (const void*)BBDN->getBasicBlock() << ">";
-  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
-  } else if (const ExternalSymbolSDNode *ES =
-             dyn_cast<ExternalSymbolSDNode>(this)) {
-    OS << "'" << ES->getSymbol() << "'";
-    if (unsigned int TF = ES->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
-    if (M->getValue())
-      OS << "<" << M->getValue() << ">";
-    else
-      OS << "<null>";
-  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
-    if (MD->getMD())
-      OS << "<" << MD->getMD() << ">";
-    else
-      OS << "<null>";
-  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
-    OS << ":" << N->getVT().getEVTString();
-  }
-  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    OS << "<" << *LD->getMemOperand();
-
-    bool doExt = true;
-    switch (LD->getExtensionType()) {
-    default: doExt = false; break;
-    case ISD::EXTLOAD: OS << ", anyext"; break;
-    case ISD::SEXTLOAD: OS << ", sext"; break;
-    case ISD::ZEXTLOAD: OS << ", zext"; break;
-    }
-    if (doExt)
-      OS << " from " << LD->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(LD->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    OS << "<" << *ST->getMemOperand();
-
-    if (ST->isTruncatingStore())
-      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(ST->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
-    OS << "<" << *M->getMemOperand() << ">";
-  } else if (const BlockAddressSDNode *BA =
-               dyn_cast<BlockAddressSDNode>(this)) {
-    OS << "<";
-    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
-    OS << ", ";
-    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
-    OS << ">";
-    if (unsigned int TF = BA->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  }
-
-  if (G)
-    if (unsigned Order = G->GetOrdering(this))
-      OS << " [ORD=" << Order << ']';
-
-  if (getNodeId() != -1)
-    OS << " [ID=" << getNodeId() << ']';
-
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope.Verify())
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    if (i) OS << ", "; else OS << " ";
-    OS << (void*)getOperand(i).getNode();
-    if (unsigned RN = getOperand(i).getResNo())
-      OS << ":" << RN;
-  }
-  print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
-                                  const SelectionDAG *G, unsigned depth,
-                                  unsigned indent) {
-  if (depth == 0)
-    return;
-
-  OS.indent(indent);
-
-  N->print(OS, G);
-
-  if (depth < 1)
-    return;
-
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    // Don't follow chain operands.
-    if (N->getOperand(i).getValueType() == MVT::Other)
-      continue;
-    OS << '\n';
-    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
-  }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
-                            unsigned depth) const {
-  printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
-  printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (N->getOperand(i).getNode()->hasOneUse())
-      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
-    else
-      dbgs() << "\n" << std::string(indent+2, ' ')
-           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
-  dbgs() << "\n";
-  dbgs().indent(indent);
-  N->dump(G);
-}
-
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   assert(N->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
@@ -6625,74 +6069,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   return 0;
 }
 
-void SelectionDAG::dump() const {
-  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
-  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
-       I != E; ++I) {
-    const SDNode *N = I;
-    if (!N->hasOneUse() && N != getRoot().getNode())
-      DumpNodes(N, 2, this);
-  }
-
-  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
-  dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
-                       const SelectionDAG *G, VisitedSDNodeSet &once) {
-  if (!once.insert(N))          // If we've been here before, return now.
-    return;
-
-  // Dump the current SDNode, but don't end the line yet.
-  OS.indent(indent);
-  N->printr(OS, G);
-
-  // Having printed this SDNode, walk the children:
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-
-    if (i) OS << ",";
-    OS << " ";
-
-    if (child->getNumOperands() == 0) {
-      // This child has no grandchildren; print it inline right here.
-      child->printr(OS, G);
-      once.insert(child);
-    } else {         // Just the address. FIXME: also print the child's opcode.
-      OS << (void*)child;
-      if (unsigned RN = N->getOperand(i).getResNo())
-        OS << ":" << RN;
-    }
-  }
-
-  OS << "\n";
-
-  // Dump children that have grandchildren on their own line(s).
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-    DumpNodesr(OS, child, indent+2, G, once);
-  }
-}
-
-void SDNode::dumpr() const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
 // getAddressSpace - Return the address space this GlobalAddress belongs to.
 unsigned GlobalAddressSDNode::getAddressSpace() const {
   return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4e4aa11..2ac9655 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2411,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
 
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // Start with "simple" cases
-  for (size_t i = 0; i < SI.getNumCases(); ++i) {
-    BasicBlock *SuccBB = SI.getCaseSuccessor(i);
+  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+       i != e; ++i) {
+    const BasicBlock *SuccBB = i.getCaseSuccessor();
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
     uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
 
-    Cases.push_back(Case(SI.getCaseValue(i),
-                         SI.getCaseValue(i),
+    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
                          SMBB, ExtraWeight));
   }
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -4561,8 +4561,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     MDNode *Variable = DI.getVariable();
     const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(Variable).Verify())
+    if (!Address || !DIVariable(Variable).Verify()) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
+    }
 
     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
     // but do not always have a corresponding SDNode built.  The SDNodeOrder
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 0000000..f981afb
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->getName(getMachineOpcode());
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+    }
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:               return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:                   return "Prefetch";
+  case ISD::MEMBARRIER:                 return "MemBarrier";
+  case ISD::ATOMIC_FENCE:               return "AtomicFence";
+  case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:                return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:            return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:            return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:            return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:             return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:            return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:           return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:            return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:            return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:           return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:           return "AtomicLoadUMax";
+  case ISD::ATOMIC_LOAD:                return "AtomicLoad";
+  case ISD::ATOMIC_STORE:               return "AtomicStore";
+  case ISD::PCMARKER:                   return "PCMarker";
+  case ISD::READCYCLECOUNTER:           return "ReadCycleCounter";
+  case ISD::SRCVALUE:                   return "SrcValue";
+  case ISD::MDNODE_SDNODE:              return "MDNode";
+  case ISD::EntryToken:                 return "EntryToken";
+  case ISD::TokenFactor:                return "TokenFactor";
+  case ISD::AssertSext:                 return "AssertSext";
+  case ISD::AssertZext:                 return "AssertZext";
+
+  case ISD::BasicBlock:                 return "BasicBlock";
+  case ISD::VALUETYPE:                  return "ValueType";
+  case ISD::Register:                   return "Register";
+  case ISD::RegisterMask:               return "RegisterMask";
+  case ISD::Constant:                   return "Constant";
+  case ISD::ConstantFP:                 return "ConstantFP";
+  case ISD::GlobalAddress:              return "GlobalAddress";
+  case ISD::GlobalTLSAddress:           return "GlobalTLSAddress";
+  case ISD::FrameIndex:                 return "FrameIndex";
+  case ISD::JumpTable:                  return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR:                 return "RETURNADDR";
+  case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR:              return "EXCEPTIONADDR";
+  case ISD::LSDAADDR:                   return "LSDAADDR";
+  case ISD::EHSELECTION:                return "EHSELECTION";
+  case ISD::EH_RETURN:                  return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
+  case ISD::ConstantPool:               return "ConstantPool";
+  case ISD::ExternalSymbol:             return "ExternalSymbol";
+  case ISD::BlockAddress:               return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:               return "BUILD_VECTOR";
+  case ISD::TargetConstant:             return "TargetConstant";
+  case ISD::TargetConstantFP:           return "TargetConstantFP";
+  case ISD::TargetGlobalAddress:        return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress:     return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex:           return "TargetFrameIndex";
+  case ISD::TargetJumpTable:            return "TargetJumpTable";
+  case ISD::TargetConstantPool:         return "TargetConstantPool";
+  case ISD::TargetExternalSymbol:       return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress:         return "TargetBlockAddress";
+
+  case ISD::CopyToReg:                  return "CopyToReg";
+  case ISD::CopyFromReg:                return "CopyFromReg";
+  case ISD::UNDEF:                      return "undef";
+  case ISD::MERGE_VALUES:               return "merge_values";
+  case ISD::INLINEASM:                  return "inlineasm";
+  case ISD::EH_LABEL:                   return "eh_label";
+  case ISD::HANDLENODE:                 return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:                       return "fabs";
+  case ISD::FNEG:                       return "fneg";
+  case ISD::FSQRT:                      return "fsqrt";
+  case ISD::FSIN:                       return "fsin";
+  case ISD::FCOS:                       return "fcos";
+  case ISD::FTRUNC:                     return "ftrunc";
+  case ISD::FFLOOR:                     return "ffloor";
+  case ISD::FCEIL:                      return "fceil";
+  case ISD::FRINT:                      return "frint";
+  case ISD::FNEARBYINT:                 return "fnearbyint";
+  case ISD::FEXP:                       return "fexp";
+  case ISD::FEXP2:                      return "fexp2";
+  case ISD::FLOG:                       return "flog";
+  case ISD::FLOG2:                      return "flog2";
+  case ISD::FLOG10:                     return "flog10";
+
+  // Binary operators
+  case ISD::ADD:                        return "add";
+  case ISD::SUB:                        return "sub";
+  case ISD::MUL:                        return "mul";
+  case ISD::MULHU:                      return "mulhu";
+  case ISD::MULHS:                      return "mulhs";
+  case ISD::SDIV:                       return "sdiv";
+  case ISD::UDIV:                       return "udiv";
+  case ISD::SREM:                       return "srem";
+  case ISD::UREM:                       return "urem";
+  case ISD::SMUL_LOHI:                  return "smul_lohi";
+  case ISD::UMUL_LOHI:                  return "umul_lohi";
+  case ISD::SDIVREM:                    return "sdivrem";
+  case ISD::UDIVREM:                    return "udivrem";
+  case ISD::AND:                        return "and";
+  case ISD::OR:                         return "or";
+  case ISD::XOR:                        return "xor";
+  case ISD::SHL:                        return "shl";
+  case ISD::SRA:                        return "sra";
+  case ISD::SRL:                        return "srl";
+  case ISD::ROTL:                       return "rotl";
+  case ISD::ROTR:                       return "rotr";
+  case ISD::FADD:                       return "fadd";
+  case ISD::FSUB:                       return "fsub";
+  case ISD::FMUL:                       return "fmul";
+  case ISD::FDIV:                       return "fdiv";
+  case ISD::FMA:                        return "fma";
+  case ISD::FREM:                       return "frem";
+  case ISD::FCOPYSIGN:                  return "fcopysign";
+  case ISD::FGETSIGN:                   return "fgetsign";
+  case ISD::FPOW:                       return "fpow";
+
+  case ISD::FPOWI:                      return "fpowi";
+  case ISD::SETCC:                      return "setcc";
+  case ISD::SELECT:                     return "select";
+  case ISD::VSELECT:                    return "vselect";
+  case ISD::SELECT_CC:                  return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:          return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:         return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:             return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:           return "insert_subvector";
+  case ISD::EXTRACT_SUBVECTOR:          return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:           return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:             return "vector_shuffle";
+  case ISD::CARRY_FALSE:                return "carry_false";
+  case ISD::ADDC:                       return "addc";
+  case ISD::ADDE:                       return "adde";
+  case ISD::SADDO:                      return "saddo";
+  case ISD::UADDO:                      return "uaddo";
+  case ISD::SSUBO:                      return "ssubo";
+  case ISD::USUBO:                      return "usubo";
+  case ISD::SMULO:                      return "smulo";
+  case ISD::UMULO:                      return "umulo";
+  case ISD::SUBC:                       return "subc";
+  case ISD::SUBE:                       return "sube";
+  case ISD::SHL_PARTS:                  return "shl_parts";
+  case ISD::SRA_PARTS:                  return "sra_parts";
+  case ISD::SRL_PARTS:                  return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND:                return "sign_extend";
+  case ISD::ZERO_EXTEND:                return "zero_extend";
+  case ISD::ANY_EXTEND:                 return "any_extend";
+  case ISD::SIGN_EXTEND_INREG:          return "sign_extend_inreg";
+  case ISD::TRUNCATE:                   return "truncate";
+  case ISD::FP_ROUND:                   return "fp_round";
+  case ISD::FLT_ROUNDS_:                return "flt_rounds";
+  case ISD::FP_ROUND_INREG:             return "fp_round_inreg";
+  case ISD::FP_EXTEND:                  return "fp_extend";
+
+  case ISD::SINT_TO_FP:                 return "sint_to_fp";
+  case ISD::UINT_TO_FP:                 return "uint_to_fp";
+  case ISD::FP_TO_SINT:                 return "fp_to_sint";
+  case ISD::FP_TO_UINT:                 return "fp_to_uint";
+  case ISD::BITCAST:                    return "bitcast";
+  case ISD::FP16_TO_FP32:               return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16:               return "fp32_to_fp16";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:                   return "cvt_ff";
+    case ISD::CVT_FS:                   return "cvt_fs";
+    case ISD::CVT_FU:                   return "cvt_fu";
+    case ISD::CVT_SF:                   return "cvt_sf";
+    case ISD::CVT_UF:                   return "cvt_uf";
+    case ISD::CVT_SS:                   return "cvt_ss";
+    case ISD::CVT_SU:                   return "cvt_su";
+    case ISD::CVT_US:                   return "cvt_us";
+    case ISD::CVT_UU:                   return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:                         return "br";
+  case ISD::BRIND:                      return "brind";
+  case ISD::BR_JT:                      return "br_jt";
+  case ISD::BRCOND:                     return "brcond";
+  case ISD::BR_CC:                      return "br_cc";
+  case ISD::CALLSEQ_START:              return "callseq_start";
+  case ISD::CALLSEQ_END:                return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:                       return "load";
+  case ISD::STORE:                      return "store";
+  case ISD::VAARG:                      return "vaarg";
+  case ISD::VACOPY:                     return "vacopy";
+  case ISD::VAEND:                      return "vaend";
+  case ISD::VASTART:                    return "vastart";
+  case ISD::DYNAMIC_STACKALLOC:         return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:            return "extract_element";
+  case ISD::BUILD_PAIR:                 return "build_pair";
+  case ISD::STACKSAVE:                  return "stacksave";
+  case ISD::STACKRESTORE:               return "stackrestore";
+  case ISD::TRAP:                       return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:                      return "bswap";
+  case ISD::CTPOP:                      return "ctpop";
+  case ISD::CTTZ:                       return "cttz";
+  case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
+  case ISD::CTLZ:                       return "ctlz";
+  case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
+
+  // Trampolines
+  case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
+  case ISD::ADJUST_TRAMPOLINE:          return "adjust_trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:                   return "setoeq";
+    case ISD::SETOGT:                   return "setogt";
+    case ISD::SETOGE:                   return "setoge";
+    case ISD::SETOLT:                   return "setolt";
+    case ISD::SETOLE:                   return "setole";
+    case ISD::SETONE:                   return "setone";
+
+    case ISD::SETO:                     return "seto";
+    case ISD::SETUO:                    return "setuo";
+    case ISD::SETUEQ:                   return "setue";
+    case ISD::SETUGT:                   return "setugt";
+    case ISD::SETUGE:                   return "setuge";
+    case ISD::SETULT:                   return "setult";
+    case ISD::SETULE:                   return "setule";
+    case ISD::SETUNE:                   return "setune";
+
+    case ISD::SETEQ:                    return "seteq";
+    case ISD::SETGT:                    return "setgt";
+    case ISD::SETGE:                    return "setge";
+    case ISD::SETLT:                    return "setlt";
+    case ISD::SETLE:                    return "setle";
+    case ISD::SETNE:                    return "setne";
+
+    case ISD::SETTRUE:                  return "settrue";
+    case ISD::SETTRUE2:                 return "settrue2";
+    case ISD::SETFALSE:                 return "setfalse";
+    case ISD::SETFALSE2:                return "setfalse2";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:              return "";
+  case ISD::PRE_INC:    return "<pre-inc>";
+  case ISD::PRE_DEC:    return "<pre-dec>";
+  case ISD::POST_INC:   return "<post-inc>";
+  case ISD::POST_DEC:   return "<post-dec>";
+  }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+  dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (llvm::next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:  OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+  dbgs() << '\n';
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS.indent(indent);
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent) {
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 2173d8d..8aabc02 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -673,7 +673,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Scheduling", GroupName,
                        TimePassesIsEnabled);
-    Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+    Scheduler->Run(CurDAG, FuncInfo->MBB);
   }
 
   if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -684,8 +684,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
 
-    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
-    FuncInfo->InsertPt = Scheduler->InsertPos;
+    // FuncInfo->InsertPt is passed by reference and set to the end of the
+    // scheduled instructions.
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
   }
 
   // If the block was split, make sure we update any references that are used to
@@ -774,7 +775,7 @@ void SelectionDAGISel::PrepareEHLandingPad() {
 
   // Assign the call site to the landing pad's begin label.
   MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-    
+
   const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
@@ -934,9 +935,9 @@ static void collectFailStats(const Instruction *I) {
   case Instruction::FPToSI:   NumFastIselFailFPToSI++; return;
   case Instruction::UIToFP:   NumFastIselFailUIToFP++; return;
   case Instruction::SIToFP:   NumFastIselFailSIToFP++; return;
-  case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; 
+  case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
   case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
-  case Instruction::BitCast:  NumFastIselFailBitCast++; return; 
+  case Instruction::BitCast:  NumFastIselFailBitCast++; return;
 
   // Other instructions...
   case Instruction::ICmp:           NumFastIselFailICmp++; return;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 5412c97..9a86f32 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,7 +42,7 @@ STATISTIC(NumInvokes, "Number of invokes replaced");
 STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
-  class SjLjEHPass : public FunctionPass {
+  class SjLjEHPrepare : public FunctionPass {
     const TargetLowering *TLI;
     Type *FunctionContextTy;
     Constant *RegisterFn;
@@ -58,7 +58,7 @@ namespace {
     AllocaInst *FuncCtx;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+    explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
       : FunctionPass(ID), TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
@@ -79,15 +79,15 @@ namespace {
   };
 } // end anonymous namespace
 
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
 
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
-  return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+  return new SjLjEHPrepare(TLI);
 }
 // doInitialization - Set up decalarations and types needed to process
 // exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
   // Build the function context structure.
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,7 +123,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
 
 /// insertCallSiteStore - Insert a store of the call-site value to the
 /// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number) {
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
   IRBuilder<> Builder(I);
 
   // Get a reference to the call_site field.
@@ -151,8 +151,8 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
 
 /// substituteLPadValues - Substitute the values returned by the landingpad
 /// instruction with those returned by the personality function.
-void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
-                                      Value *SelVal) {
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                                         Value *SelVal) {
   SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
   while (!UseWorkList.empty()) {
     Value *Val = UseWorkList.pop_back_val();
@@ -183,7 +183,7 @@ void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
 
 /// setupFunctionContext - Allocate the function context on the stack and fill
 /// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
 setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   BasicBlock *EntryBB = F.begin();
 
@@ -251,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
 /// specially, we lower each arg to a copy instruction in the entry block. This
 /// ensures that the argument value itself cannot be live out of the entry
 /// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
   BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
   while (isa<AllocaInst>(AfterAllocaInsPt) &&
          isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -295,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
 
 /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
 /// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
-                                        ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+                                           ArrayRef<InvokeInst*> Invokes) {
   // Finally, scan the code looking for instructions with bad live ranges.
   for (Function::iterator
          BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -393,7 +393,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
 /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
 /// the function context and marking the call sites with the appropriate
 /// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   SmallVector<ReturnInst*,     16> Returns;
   SmallVector<InvokeInst*,     16> Invokes;
   SmallSetVector<LandingPadInst*, 16> LPads;
@@ -519,7 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
   return true;
 }
 
-bool SjLjEHPass::runOnFunction(Function &F) {
+bool SjLjEHPrepare::runOnFunction(Function &F) {
   bool Res = setupEntryBlockAndCallSites(F);
   return Res;
 }
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 58caae8..cb11bfe 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -1,3 +1,5 @@
+
+
 add_llvm_library(LLVMExecutionEngine
   ExecutionEngine.cpp
   ExecutionEngineBindings.cpp
@@ -8,3 +10,11 @@ add_subdirectory(Interpreter)
 add_subdirectory(JIT)
 add_subdirectory(MCJIT)
 add_subdirectory(RuntimeDyld)
+
+if( LLVM_USE_OPROFILE )
+  add_subdirectory(OProfileJIT)
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  add_subdirectory(IntelJITEvents)
+endif( LLVM_USE_INTEL_JITEVENTS )
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
new file mode 100644
index 0000000..1c07c94
--- /dev/null
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -0,0 +1,67 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for JITEventListener implementations
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EVENT_LISTENER_COMMON_H
+#define EVENT_LISTENER_COMMON_H
+
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace jitprofiling {
+
+class FilenameCache {
+  // Holds the filename of each Scope, so that we can pass a null-terminated
+  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
+  // shouldn't be modifying any MDNodes while this map is alive.
+  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+  DenseMap<AssertingVH<MDNode>, std::string> Paths;
+
+ public:
+  const char *getFilename(MDNode *Scope) {
+    std::string &Filename = Filenames[Scope];
+    if (Filename.empty()) {
+      DIScope DIScope(Scope);
+      Filename = DIScope.getFilename();
+    }
+    return Filename.c_str();
+  }
+
+  const char *getFullPath(MDNode *Scope) {
+    std::string &P = Paths[Scope];
+    if (P.empty()) {
+      DIScope DIScope(Scope);
+      StringRef DirName = DIScope.getDirectory();
+      StringRef FileName = DIScope.getFilename();
+      SmallString<256> FullPath;
+      if (DirName != "." && DirName != "") {
+        FullPath = DirName;
+      }
+      if (FileName != "") {
+        sys::path::append(FullPath, FileName);
+      }
+      P = FullPath.str();
+    }
+    return P.c_str();
+  }
+};
+
+} // namespace jitprofiling
+
+} // namespace llvm
+
+#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
new file mode 100644
index 0000000..7d67d0d
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+set(system_libs
+  ${system_libs}
+  jitprofiling
+  )
+
+add_llvm_library(LLVMIntelJITEvents
+  IntelJITEventListener.cpp
+  )
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
new file mode 100644
index 0000000..5dfa78f
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -0,0 +1,183 @@
+//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object to tell Intel(R) VTune(TM)
+// Amplifier XE 2011 about JITted functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/ValueHandle.h"
+#include "EventListenerCommon.h"
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class IntelJITEventListener : public JITEventListener {
+  typedef DenseMap<void*, unsigned int> MethodIDMap;
+
+  IntelJITEventsWrapper& Wrapper;
+  MethodIDMap MethodIDs;
+  FilenameCache Filenames;
+
+public:
+  IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
+  : Wrapper(libraryWrapper) {
+  }
+
+  ~IntelJITEventListener() {
+  }
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+static LineNumberInfo LineStartToIntelJITFormat(
+    uintptr_t StartAddress,
+    uintptr_t Address,
+    DebugLoc Loc) {
+  LineNumberInfo Result;
+
+  Result.Offset = Address - StartAddress;
+  Result.LineNumber = Loc.getLine();
+
+  return Result;
+}
+
+static iJIT_Method_Load FunctionDescToIntelJITFormat(
+    IntelJITEventsWrapper& Wrapper,
+    const char* FnName,
+    uintptr_t FnStart,
+    size_t FnSize) {
+  iJIT_Method_Load Result;
+  memset(&Result, 0, sizeof(iJIT_Method_Load));
+
+  Result.method_id = Wrapper.iJIT_GetNewMethodID();
+  Result.method_name = const_cast<char*>(FnName);
+  Result.method_load_address = reinterpret_cast<void*>(FnStart);
+  Result.method_size = FnSize;
+
+  Result.class_id = 0;
+  Result.class_file_name = NULL;
+  Result.user_data = NULL;
+  Result.user_data_size = 0;
+  Result.env = iJDE_JittingAPI;
+
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void IntelJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &Details) {
+  iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+                                      F.getName().data(),
+                                      reinterpret_cast<uint64_t>(FnStart),
+                                      FnSize);
+
+  std::vector<LineNumberInfo> LineInfo;
+
+  if (!Details.LineStarts.empty()) {
+    // Now convert the line number information from the address/DebugLoc
+    // format in Details to the offset/lineno in Intel JIT API format.
+
+    LineInfo.reserve(Details.LineStarts.size() + 1);
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      FunctionMessage.source_file_name = const_cast<char*>(
+                                          Filenames.getFullPath(FirstLocScope));
+
+      LineNumberInfo FirstLine;
+      FirstLine.Offset = 0;
+      FirstLine.LineNumber = FunctionDI.getLineNumber();
+      LineInfo.push_back(FirstLine);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
+          Details.LineStarts.begin(), E = Details.LineStarts.end();
+          I != E; ++I) {
+      // This implementation ignores the DebugLoc filename because the Intel
+      // JIT API does not support multiple source files associated with a single
+      // JIT function
+      LineInfo.push_back(LineStartToIntelJITFormat(
+                          reinterpret_cast<uintptr_t>(FnStart),
+                          I->Address,
+                          I->Loc));
+
+      // If we have no file name yet for the function, use the filename from
+      // the first instruction that has one
+      if (FunctionMessage.source_file_name == 0) {
+        MDNode *scope = I->Loc.getScope(
+					Details.MF->getFunction()->getContext());
+        FunctionMessage.source_file_name = const_cast<char*>(
+                                                  Filenames.getFullPath(scope));
+      }
+    }
+
+    FunctionMessage.line_number_size = LineInfo.size();
+    FunctionMessage.line_number_table = &*LineInfo.begin();
+  } else {
+    FunctionMessage.line_number_size = 0;
+    FunctionMessage.line_number_table = 0;
+  }
+
+  Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+													 &FunctionMessage);
+  MethodIDs[FnStart] = FunctionMessage.method_id;
+}
+
+void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+  MethodIDMap::iterator I = MethodIDs.find(FnStart);
+  if (I != MethodIDs.end()) {
+    Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+    MethodIDs.erase(I);
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createIntelJITEventListener() {
+  static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
+                                            new IntelJITEventsWrapper);
+  return new IntelJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createIntelJITEventListener(
+                                      IntelJITEventsWrapper* TestImpl) {
+  return new IntelJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
new file mode 100644
index 0000000..80d2273
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/JITProfileAmplifier/LLVMBuild.txt --*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = IntelJITEvents
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile
new file mode 100644
index 0000000..ba75ac6
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/Makefile
@@ -0,0 +1,17 @@
+##===- lib/ExecutionEngine/JITProfile/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIntelJITEvents
+
+include $(LEVEL)/Makefile.config
+
+SOURCES := IntelJITEventListener.cpp
+CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 3dce3b3..af47be9 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -650,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
 
   // Check to see if any of the cases match...
   BasicBlock *Dest = 0;
-  unsigned NumCases = I.getNumCases();
-  // Skip the first item since that's the default case.
-  for (unsigned i = 0; i < NumCases; ++i) {
-    GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF);
+  for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
+    GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
     if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
-      Dest = cast<BasicBlock>(I.getCaseSuccessor(i));
+      Dest = cast<BasicBlock>(i.getCaseSuccessor());
       break;
     }
   }
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index dcef08c..52bb389 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -2,10 +2,8 @@
 add_definitions(-DENABLE_X86_JIT)
 
 add_llvm_library(LLVMJIT
-  Intercept.cpp
   JIT.cpp
   JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
-  OProfileJITEventListener.cpp
   )
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
deleted file mode 100644
index 2251a8e..0000000
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call.  This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially.  For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
-  while (!AtExitHandlers.empty()) {
-    void (*Fn)() = AtExitHandlers.back();
-    AtExitHandlers.pop_back();
-    Fn();
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number.  On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
-  StatSymbols() {
-    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
-    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
-    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
-    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
-    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
-    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
-    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
-    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
-    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
-  }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
-  runAtExitHandlers();   // Run atexit handlers...
-  exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
-  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
-  return 0;  // Always successful
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface.  As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *JIT::getPointerToNamedFunction(const std::string &Name,
-                                     bool AbortOnFailure) {
-  if (!isSymbolSearchingDisabled()) {
-    // Check to see if this is one of the functions we want to intercept.  Note,
-    // we cast to intptr_t here to silence a -pedantic warning that complains
-    // about casting a function pointer to a normal pointer.
-    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
-    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
-    // We should not invoke parent's ctors/dtors from generated main()!
-    // On Mingw and Cygwin, the symbol __main is resolved to
-    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-    // (and register wrong callee's dtors with atexit(3)).
-    // We expect ExecutionEngine::runStaticConstructorsDestructors()
-    // is called before ExecutionEngine::runFunctionAsMain() is called.
-    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-    const char *NameStr = Name.c_str();
-    // If this is an asm specifier, skip the sentinal.
-    if (NameStr[0] == 1) ++NameStr;
-
-    // If it's an external function, look it up in the process image...
-    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-    if (Ptr) return Ptr;
-
-    // If it wasn't found and if it starts with an underscore ('_') character,
-    // and has an asm specifier, try again without the underscore.
-    if (Name[0] == 1 && NameStr[0] == '_') {
-      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-      if (Ptr) return Ptr;
-    }
-
-    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
-    // are references to hidden visibility symbols that dlsym cannot resolve.
-    // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
-    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
-        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
-      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
-      // This mirrors logic in libSystemStubs.a.
-      std::string Prefix = std::string(Name.begin(), Name.end()-9);
-      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
-        return Ptr;
-      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
-        return Ptr;
-    }
-#endif
-  }
-
-  /// If a LazyFunctionCreator is installed, use it to get/create the function.
-  if (LazyFunctionCreator)
-    if (void *RP = LazyFunctionCreator(Name))
-      return RP;
-
-  if (AbortOnFailure) {
-    report_fatal_error("Program used external function '"+Name+
-                      "' which could not be resolved!");
-  }
-  return 0;
-}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index f715f6f..16b8ee2 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -267,9 +268,9 @@ extern "C" {
 }
 
 JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-         JITMemoryManager *JMM, bool GVsWithCode)
-  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
-    isAlreadyCodeGenerating(false) {
+         JITMemoryManager *jmm, bool GVsWithCode)
+  : ExecutionEngine(M), TM(tm), TJI(tji), JMM(jmm),
+    AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
   setTargetData(TM.getTargetData());
 
   jitstate = new JITState(M);
@@ -711,6 +712,27 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
   }
 }
 
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure){
+  if (!isSymbolSearchingDisabled()) {
+    void *ptr = JMM->getPointerToNamedFunction(Name, false);
+    if (ptr)
+      return ptr;
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
+
+
 /// getOrEmitGlobalVariable - Return the address of the specified global
 /// variable, possibly emitting it to memory if needed.  This is used by the
 /// Emitter.
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 17d33fe..c557981 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -58,6 +58,7 @@ class JIT : public ExecutionEngine {
   TargetMachine &TM;       // The current target we are compiling to
   TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
   JITCodeEmitter *JCE;     // JCE object
+  JITMemoryManager *JMM;
   std::vector<JITEventListener*> EventListeners;
 
   /// AllocateGVsWithCode - Some applications require that global variables and
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index efd570d..d404d0c 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -314,6 +314,17 @@ namespace {
     /// should allocate a separate slab.
     static const size_t DefaultSizeThreshold;
 
+    /// getPointerToNamedFunction - This method returns the address of the
+    /// specified function by using the dlsym function call.  As such it is only
+    /// useful for resolving library symbols, not code generated symbols.
+    ///
+    /// If AbortOnFailure is false and no function with the given name is
+    /// found, this function silently returns a null pointer. Otherwise,
+    /// it prints a message to stderr and aborts.
+    ///
+    virtual void *getPointerToNamedFunction(const std::string &Name,
+                                            bool AbortOnFailure = true);
+
     void AllocateGOT();
 
     // Testing methods.
@@ -757,6 +768,148 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
   return true;
 }
 
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  // Check to see if this is one of the functions we want to intercept.  Note,
+  // we cast to intptr_t here to silence a -pedantic warning that complains
+  // about casting a function pointer to a normal pointer.
+  if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+  if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  // If this is an asm specifier, skip the sentinal.
+  if (NameStr[0] == 1) ++NameStr;
+
+  // If it's an external function, look it up in the process image...
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+  // are references to hidden visibility symbols that dlsym cannot resolve.
+  // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+  if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+      memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+    // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+    // This mirrors logic in libSystemStubs.a.
+    std::string Prefix = std::string(Name.begin(), Name.end()-9);
+    if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+      return Ptr;
+    if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+      return Ptr;
+  }
+#endif
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
+
+
+
 JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
   return new DefaultJITMemoryManager();
 }
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index d426969..1f94a4f 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Interpreter JIT MCJIT RuntimeDyld
+subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT
 
 [component_0]
 type = Library
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index 2c0f8d6..fef7176 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_library(LLVMMCJIT
   MCJIT.cpp
   MCJITMemoryManager.cpp
-  Intercept.cpp
   )
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
deleted file mode 100644
index f83f428..0000000
--- a/lib/ExecutionEngine/MCJIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call.  This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially.  For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCJIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
-  while (!AtExitHandlers.empty()) {
-    void (*Fn)() = AtExitHandlers.back();
-    AtExitHandlers.pop_back();
-    Fn();
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number.  On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
-  StatSymbols() {
-    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
-    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
-    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
-    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
-    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
-    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
-    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
-    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
-    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
-  }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
-  runAtExitHandlers();   // Run atexit handlers...
-  exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
-  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
-  return 0;  // Always successful
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface.  As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *MCJIT::getPointerToNamedFunction(const std::string &Name,
-                                       bool AbortOnFailure) {
-  if (!isSymbolSearchingDisabled()) {
-    // Check to see if this is one of the functions we want to intercept.  Note,
-    // we cast to intptr_t here to silence a -pedantic warning that complains
-    // about casting a function pointer to a normal pointer.
-    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
-    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
-    // We should not invoke parent's ctors/dtors from generated main()!
-    // On Mingw and Cygwin, the symbol __main is resolved to
-    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-    // (and register wrong callee's dtors with atexit(3)).
-    // We expect ExecutionEngine::runStaticConstructorsDestructors()
-    // is called before ExecutionEngine::runFunctionAsMain() is called.
-    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-    const char *NameStr = Name.c_str();
-    // If this is an asm specifier, skip the sentinal.
-    if (NameStr[0] == 1) ++NameStr;
-
-    // If it's an external function, look it up in the process image...
-    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-    if (Ptr) return Ptr;
-
-    // If it wasn't found and if it starts with an underscore ('_') character,
-    // and has an asm specifier, try again without the underscore.
-    if (Name[0] == 1 && NameStr[0] == '_') {
-      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-      if (Ptr) return Ptr;
-    }
-
-    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
-    // are references to hidden visibility symbols that dlsym cannot resolve.
-    // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
-    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
-        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
-      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
-      // This mirrors logic in libSystemStubs.a.
-      std::string Prefix = std::string(Name.begin(), Name.end()-9);
-      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
-        return Ptr;
-      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
-        return Ptr;
-    }
-#endif
-  }
-
-  /// If a LazyFunctionCreator is installed, use it to get/create the function.
-  if (LazyFunctionCreator)
-    if (void *RP = LazyFunctionCreator(Name))
-      return RP;
-
-  if (AbortOnFailure) {
-    report_fatal_error("Program used external function '"+Name+
-                      "' which could not be resolved!");
-  }
-  return 0;
-}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 5f93a8d..cbb23d3 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -215,3 +215,23 @@ GenericValue MCJIT::runFunction(Function *F,
 
   llvm_unreachable("Full-featured argument passing not supported yet!");
 }
+
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+                                       bool AbortOnFailure){
+  if (!isSymbolSearchingDisabled()) {
+    void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+    if (ptr)
+      return ptr;
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 7f4ae77..2b3df98 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -67,6 +67,7 @@ public:
   ///
   virtual void *getPointerToNamedFunction(const std::string &Name,
                                           bool AbortOnFailure = true);
+
   /// mapSectionAddress - map a section to its target address space value.
   /// Map the address of a JIT section as returned from the memory manager
   /// to the address in the target process as the running code will see it.
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
index ac8c155..dac8b26 100644
--- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -33,46 +33,17 @@ public:
 
   uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID) {
-    return JMM->allocateDataSection(Size, Alignment, SectionID);
+    return JMM->allocateSpace(Size, Alignment);
   }
 
   uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID) {
-    return JMM->allocateCodeSection(Size, Alignment, SectionID);
+    return JMM->allocateSpace(Size, Alignment);
   }
 
-  // Allocate ActualSize bytes, or more, for the named function. Return
-  // a pointer to the allocated memory and update Size to reflect how much
-  // memory was acutally allocated.
-  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
-    // FIXME: This should really reference the MCAsmInfo to get the global
-    //        prefix.
-    if (Name[0] == '_') ++Name;
-    Function *F = M->getFunction(Name);
-    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
-    // the symbol and it's of the ObjC conventions (starts with "-" or 
-    // "+"), try prepending a \01 and see if we can find it that way.
-    if (!F && (Name[0] == '-' || Name[0] == '+'))
-      F = M->getFunction((Twine("\1") + Name).str());
-    assert(F && "No matching function in JIT IR Module!");
-    return JMM->startFunctionBody(F, Size);
-  }
-
-  // Mark the end of the function, including how much of the allocated
-  // memory was actually used.
-  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                       uint8_t *FunctionEnd) {
-    // FIXME: This should really reference the MCAsmInfo to get the global
-    //        prefix.
-    if (Name[0] == '_') ++Name;
-    Function *F = M->getFunction(Name);
-    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
-    // the symbol and it's of the ObjC conventions (starts with "-" or
-    // "+"), try prepending a \01 and see if we can find it that way.
-    if (!F && (Name[0] == '-' || Name[0] == '+'))
-      F = M->getFunction((Twine("\1") + Name).str());
-    assert(F && "No matching function in JIT IR Module!");
-    JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return JMM->getPointerToNamedFunction(Name, AbortOnFailure);
   }
 
 };
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 9a649a5..c26e0ad 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,17 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../..
 LIBRARYNAME = LLVMExecutionEngine
+
+include $(LEVEL)/Makefile.config
+
 PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
 
-include $(LEVEL)/Makefile.common
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+PARALLEL_DIRS += IntelJITEvents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+PARALLEL_DIRS += OProfileJIT
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
new file mode 100644
index 0000000..d585136
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+include_directories( ${LLVM_OPROFILE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMOProfileJIT
+  OProfileJITEventListener.cpp
+  OProfileWrapper.cpp
+  )
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
new file mode 100644
index 0000000..4516dfa
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = OProfileJIT
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/Makefile b/lib/ExecutionEngine/OProfileJIT/Makefile
new file mode 100644
index 0000000..fd3adce
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/Makefile
@@ -0,0 +1,18 @@
+##===- lib/ExecutionEngine/OProfileJIT/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMOProfileJIT
+
+include $(LEVEL)/Makefile.config
+
+SOURCES += OProfileJITEventListener.cpp \
+  OProfileWrapper.cpp
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 9a9ed6d..e6142e3 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -7,51 +7,55 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines a JITEventListener object that calls into OProfile to tell
-// it about JITted functions.  For now, we only record function names and sizes,
-// but eventually we'll also record line number information.
-//
-// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
-// definition of the interface we're using.
+// This file defines a JITEventListener object that uses OProfileWrapper to tell
+// oprofile about JITted functions, including source line information.
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
 #define DEBUG_TYPE "oprofile-jit-event-listener"
 #include "llvm/Function.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h"
-#include <stddef.h>
-using namespace llvm;
+#include "EventListenerCommon.h"
 
-#if USE_OPROFILE
+#include <dirent.h>
+#include <fcntl.h>
 
-#include <opagent.h>
+using namespace llvm;
+using namespace llvm::jitprofiling;
 
 namespace {
 
 class OProfileJITEventListener : public JITEventListener {
-  op_agent_t Agent;
+  OProfileWrapper& Wrapper;
+
+  void initialize();
+
 public:
-  OProfileJITEventListener();
+  OProfileJITEventListener(OProfileWrapper& LibraryWrapper)
+  : Wrapper(LibraryWrapper) {
+    initialize();
+  }
+
   ~OProfileJITEventListener();
 
   virtual void NotifyFunctionEmitted(const Function &F,
-                                     void *FnStart, size_t FnSize,
-                                     const EmittedFunctionDetails &Details);
+                                void *FnStart, size_t FnSize,
+                                const JITEvent_EmittedFunctionDetails &Details);
+
   virtual void NotifyFreeingMachineCode(void *OldPtr);
 };
 
-OProfileJITEventListener::OProfileJITEventListener()
-    : Agent(op_open_agent()) {
-  if (Agent == NULL) {
+void OProfileJITEventListener::initialize() {
+  if (!Wrapper.op_open_agent()) {
     const std::string err_str = sys::StrError();
     DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
   } else {
@@ -60,8 +64,8 @@ OProfileJITEventListener::OProfileJITEventListener()
 }
 
 OProfileJITEventListener::~OProfileJITEventListener() {
-  if (Agent != NULL) {
-    if (op_close_agent(Agent) == -1) {
+  if (Wrapper.isAgentAvailable()) {
+    if (Wrapper.op_close_agent() == -1) {
       const std::string err_str = sys::StrError();
       DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
                    << err_str << "\n");
@@ -71,22 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() {
   }
 }
 
-class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass a null-terminated
-  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
-  // shouldn't be modifying any MDNodes while this map is alive.
-  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
-
- public:
-  const char *getFilename(MDNode *Scope) {
-    std::string &Filename = Filenames[Scope];
-    if (Filename.empty()) {
-      Filename = DIScope(Scope).getFilename();
-    }
-    return Filename.c_str();
-  }
-};
-
 static debug_line_info LineStartToOProfileFormat(
     const MachineFunction &MF, FilenameCache &Filenames,
     uintptr_t Address, DebugLoc Loc) {
@@ -103,9 +91,9 @@ static debug_line_info LineStartToOProfileFormat(
 // Adds the just-emitted function to the symbol table.
 void OProfileJITEventListener::NotifyFunctionEmitted(
     const Function &F, void *FnStart, size_t FnSize,
-    const EmittedFunctionDetails &Details) {
+    const JITEvent_EmittedFunctionDetails &Details) {
   assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
-  if (op_write_native_code(Agent, F.getName().data(),
+  if (Wrapper.op_write_native_code(F.getName().data(),
                            reinterpret_cast<uint64_t>(FnStart),
                            FnStart, FnSize) == -1) {
     DEBUG(dbgs() << "Failed to tell OProfile about native function "
@@ -151,8 +139,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
     // line info's address to include the start of the function.
     LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
 
-    if (op_write_debug_line_info(Agent, FnStart,
-                                 LineInfo.size(), &*LineInfo.begin()) == -1) {
+    if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
+                                      &*LineInfo.begin()) == -1) {
       DEBUG(dbgs()
             << "Failed to tell OProfile about line numbers for native function "
             << F.getName() << " at ["
@@ -164,7 +152,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted(
 // Removes the being-deleted function from the symbol table.
 void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
   assert(FnStart && "Invalid function pointer");
-  if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+  if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
     DEBUG(dbgs()
           << "Failed to tell OProfile about unload of native function at "
           << FnStart << "\n");
@@ -174,19 +162,16 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
 }  // anonymous namespace.
 
 namespace llvm {
-JITEventListener *createOProfileJITEventListener() {
-  return new OProfileJITEventListener;
-}
+JITEventListener *JITEventListener::createOProfileJITEventListener() {
+  static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper);
+  return new OProfileJITEventListener(*JITProfilingWrapper);
 }
 
-#else  // USE_OPROFILE
-
-namespace llvm {
-// By defining this to return NULL, we can let clients call it unconditionally,
-// even if they haven't configured with the OProfile libraries.
-JITEventListener *createOProfileJITEventListener() {
-  return NULL;
+// for testing
+JITEventListener *JITEventListener::createOProfileJITEventListener(
+                                      OProfileWrapper* TestImpl) {
+  return new OProfileJITEventListener(*TestImpl);
 }
-}  // namespace llvm
 
-#endif  // USE_OPROFILE
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
new file mode 100644
index 0000000..d67f537
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -0,0 +1,263 @@
+//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface in OProfileWrapper.h. It is responsible
+// for loading the opagent dynamic library when the first call to an op_
+// function occurs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+
+#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/ADT/SmallString.h"
+
+#include <sstream>
+#include <cstring>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+namespace {
+
+// Global mutex to ensure a single thread initializes oprofile agent.
+llvm::sys::Mutex OProfileInitializationMutex;
+
+} // anonymous namespace
+
+namespace llvm {
+
+OProfileWrapper::OProfileWrapper()
+: Agent(0),
+  OpenAgentFunc(0),
+  CloseAgentFunc(0),
+  WriteNativeCodeFunc(0),
+  WriteDebugLineInfoFunc(0),
+  UnloadNativeCodeFunc(0),
+  MajorVersionFunc(0),
+  MinorVersionFunc(0),
+  IsOProfileRunningFunc(0),
+  Initialized(false) {
+}
+
+bool OProfileWrapper::initialize() {
+  using namespace llvm;
+  using namespace llvm::sys;
+
+  MutexGuard Guard(OProfileInitializationMutex);
+
+  if (Initialized)
+    return OpenAgentFunc != 0;
+
+  Initialized = true;
+
+  // If the oprofile daemon is not running, don't load the opagent library
+  if (!isOProfileRunning()) {
+    DEBUG(dbgs() << "OProfile daemon is not detected.\n");
+    return false;
+  }
+
+  std::string error;
+  if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) {
+    DEBUG(dbgs()
+            << "OProfile connector library libopagent.so could not be loaded: "
+            << error << "\n");
+  }
+
+  // Get the addresses of the opagent functions
+  OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_open_agent");
+  CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_close_agent");
+  WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code");
+  WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info");
+  UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code");
+  MajorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_major_version");
+  MinorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_minor_version");
+
+  // With missing functions, we can do nothing
+  if (!OpenAgentFunc
+      || !CloseAgentFunc
+      || !WriteNativeCodeFunc
+      || !WriteDebugLineInfoFunc
+      || !UnloadNativeCodeFunc) {
+    OpenAgentFunc = 0;
+    CloseAgentFunc = 0;
+    WriteNativeCodeFunc = 0;
+    WriteDebugLineInfoFunc = 0;
+    UnloadNativeCodeFunc = 0;
+    return false;
+  }
+
+  return true;
+}
+
+bool OProfileWrapper::isOProfileRunning() {
+  if (IsOProfileRunningFunc != 0)
+    return IsOProfileRunningFunc();
+  return checkForOProfileProcEntry();
+}
+
+bool OProfileWrapper::checkForOProfileProcEntry() {
+  DIR* ProcDir;
+
+  ProcDir = opendir("/proc");
+  if (!ProcDir)
+    return false;
+
+  // Walk the /proc tree looking for the oprofile daemon
+  struct dirent* Entry;
+  while (0 != (Entry = readdir(ProcDir))) {
+    if (Entry->d_type == DT_DIR) {
+      // Build a path from the current entry name
+      SmallString<256> CmdLineFName;
+      raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name
+                                        << "/cmdline";
+
+      // Open the cmdline file
+      int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR);
+      if (CmdLineFD != -1) {
+        char    ExeName[PATH_MAX+1];
+        char*   BaseName = 0;
+
+        // Read the cmdline file
+        ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1);
+        close(CmdLineFD);
+        ssize_t Idx = 0;
+
+        // Find the terminator for the first string
+        while (Idx < NumRead-1 && ExeName[Idx] != 0) {
+          Idx++;
+        }
+
+        // Go back to the last non-null character
+        Idx--;
+
+        // Find the last path separator in the first string
+        while (Idx > 0) {
+          if (ExeName[Idx] == '/') {
+            BaseName = ExeName + Idx + 1;
+            break;
+          }
+          Idx--;
+        }
+
+        // Test this to see if it is the oprofile daemon
+        if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+          // If it is, we're done
+          closedir(ProcDir);
+          return true;
+        }
+      }
+    }
+  }
+
+  // We've looked through all the files and didn't find the daemon
+  closedir(ProcDir);
+  return false;
+}
+
+bool OProfileWrapper::op_open_agent() {
+  if (!Initialized)
+    initialize();
+
+  if (OpenAgentFunc != 0) {
+    Agent = OpenAgentFunc();
+    return Agent != 0;
+  }
+
+  return false;
+}
+
+int OProfileWrapper::op_close_agent() {
+  if (!Initialized)
+    initialize();
+
+  int ret = -1;
+  if (Agent && CloseAgentFunc) {
+    ret = CloseAgentFunc(Agent);
+    if (ret == 0) {
+      Agent = 0;
+    }
+  }
+  return ret;
+}
+
+bool OProfileWrapper::isAgentAvailable() {
+  return Agent != 0;
+}
+
+int OProfileWrapper::op_write_native_code(const char* Name,
+                                          uint64_t Addr,
+                                          void const* Code,
+                                          const unsigned int Size) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && WriteNativeCodeFunc)
+    return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size);
+
+  return -1;
+}
+
+int OProfileWrapper::op_write_debug_line_info(
+  void const* Code,
+  size_t NumEntries,
+  struct debug_line_info const* Info) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && WriteDebugLineInfoFunc)
+    return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info);
+
+  return -1;
+}
+
+int OProfileWrapper::op_major_version() {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && MajorVersionFunc)
+    return MajorVersionFunc();
+
+  return -1;
+}
+
+int OProfileWrapper::op_minor_version() {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && MinorVersionFunc)
+    return MinorVersionFunc();
+
+  return -1;
+}
+
+int  OProfileWrapper::op_unload_native_code(uint64_t Addr) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && UnloadNativeCodeFunc)
+    return UnloadNativeCodeFunc(Agent, Addr);
+
+  return -1;
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 2896c2d..ff4a2c8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -26,45 +26,290 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
 
-void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
-                                      uint8_t *EndAddress) {
-  // FIXME: DEPRECATED in favor of by-section allocation.
-  // Allocate memory for the function via the memory manager.
-  uintptr_t Size = EndAddress - StartAddress + 1;
-  uintptr_t AllocSize = Size;
-  uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
-  assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
-         "Memory manager failed to allocate enough memory!");
-  // Copy the function payload into the memory block.
-  memcpy(Mem, StartAddress, Size);
-  MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
-  // Remember where we put it.
-  unsigned SectionID = Sections.size();
-  Sections.push_back(sys::MemoryBlock(Mem, Size));
 
-  // Default the assigned address for this symbol to wherever this
-  // allocated it.
-  SymbolTable[Name] = SymbolLoc(SectionID, 0);
-  DEBUG(dbgs() << "    allocated to [" << Mem << ", " << Mem + Size << "]\n");
-}
 
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
+  // First, resolve relocations assotiated with external symbols.
+  resolveSymbols();
+
   // Just iterate over the sections we have and resolve all the relocations
   // in them. Gross overkill, but it gets the job done.
   for (int i = 0, e = Sections.size(); i != e; ++i) {
-    reassignSectionAddress(i, SectionLoadAddress[i]);
+    reassignSectionAddress(i, Sections[i].LoadAddress);
   }
 }
 
 void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress,
                                         uint64_t TargetAddress) {
-  assert(SectionLocalMemToID.count(LocalAddress) &&
-         "Attempting to remap address of unknown section!");
-  unsigned SectionID = SectionLocalMemToID[LocalAddress];
-  reassignSectionAddress(SectionID, TargetAddress);
+  for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Address == LocalAddress) {
+      reassignSectionAddress(i, TargetAddress);
+      return;
+    }
+  }
+  llvm_unreachable("Attempting to remap address of unknown section!");
+}
+
+bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
+  // FIXME: ObjectFile don't modify MemoryBuffer.
+  //        It should use const MemoryBuffer as parameter.
+  ObjectFile *obj = ObjectFile::
+                      createObjectFile(const_cast<MemoryBuffer*>(InputBuffer));
+
+  Arch = (Triple::ArchType)obj->getArch();
+
+  LocalSymbolMap LocalSymbols;     // Functions and data symbols from the
+                                   // object file.
+  ObjSectionToIDMap LocalSections; // Used sections from the object file
+
+  error_code err;
+
+
+  // Parse symbols
+  DEBUG(dbgs() << "Parse symbols:\n");
+  for (symbol_iterator it = obj->begin_symbols(), itEnd = obj->end_symbols();
+       it != itEnd; it.increment(err)) {
+    if (err) break;
+    object::SymbolRef::Type SymType;
+    StringRef Name;
+    if ((bool)(err = it->getType(SymType))) break;
+    if ((bool)(err = it->getName(Name))) break;
+
+    if (SymType == object::SymbolRef::ST_Function ||
+        SymType == object::SymbolRef::ST_Data) {
+      uint64_t FileOffset;
+      uint32_t flags;
+      StringRef sData;
+      section_iterator sIt = obj->end_sections();
+      if ((bool)(err = it->getFileOffset(FileOffset))) break;
+      if ((bool)(err = it->getFlags(flags))) break;
+      if ((bool)(err = it->getSection(sIt))) break;
+      if (sIt == obj->end_sections()) continue;
+      if ((bool)(err = sIt->getContents(sData))) break;
+      const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+                              (uintptr_t)FileOffset;
+      uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
+      unsigned SectionID =
+        findOrEmitSection(*sIt,
+                          SymType == object::SymbolRef::ST_Function,
+                          LocalSections);
+      bool isGlobal = flags & SymbolRef::SF_Global;
+      LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+      DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+                   << " flags: " << flags
+                   << " SID: " << SectionID
+                   << " Offset: " << format("%p", SectOffset));
+      if (isGlobal)
+        SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+    }
+    DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
+  }
+  if (err) {
+    report_fatal_error(err.message());
+  }
+
+  // Parse and proccess relocations
+  DEBUG(dbgs() << "Parse relocations:\n");
+  for (section_iterator sIt = obj->begin_sections(),
+       sItEnd = obj->end_sections(); sIt != sItEnd; sIt.increment(err)) {
+    if (err) break;
+    bool isFirstRelocation = true;
+    unsigned SectionID = 0;
+    StubMap Stubs;
+
+    for (relocation_iterator it = sIt->begin_relocations(),
+         itEnd = sIt->end_relocations(); it != itEnd; it.increment(err)) {
+      if (err) break;
+
+      // If it's first relocation in this section, find its SectionID
+      if (isFirstRelocation) {
+        SectionID = findOrEmitSection(*sIt, true, LocalSections);
+        DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
+        isFirstRelocation = false;
+      }
+
+      ObjRelocationInfo RI;
+      RI.SectionID = SectionID;
+      if ((bool)(err = it->getAdditionalInfo(RI.AdditionalInfo))) break;
+      if ((bool)(err = it->getOffset(RI.Offset))) break;
+      if ((bool)(err = it->getSymbol(RI.Symbol))) break;
+      if ((bool)(err = it->getType(RI.Type))) break;
+
+      DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
+                   << " Offset: " << format("%p", (uintptr_t)RI.Offset)
+                   << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
+                   << "\n");
+      processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+    }
+    if (err) {
+      report_fatal_error(err.message());
+    }
+  }
+  return false;
+}
+
+unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section,
+                                      bool IsCode) {
+
+  unsigned StubBufSize = 0,
+           StubSize = getMaxStubSize();
+  error_code err;
+  if (StubSize > 0) {
+    for (relocation_iterator it = Section.begin_relocations(),
+         itEnd = Section.end_relocations(); it != itEnd; it.increment(err))
+      StubBufSize += StubSize;
+  }
+  StringRef data;
+  uint64_t Alignment64;
+  if ((bool)(err = Section.getContents(data))) report_fatal_error(err.message());
+  if ((bool)(err = Section.getAlignment(Alignment64)))
+    report_fatal_error(err.message());
+
+  unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
+  unsigned DataSize = data.size();
+  unsigned Allocate = DataSize + StubBufSize;
+  unsigned SectionID = Sections.size();
+  const char *pData = data.data();
+  uint8_t *Addr = IsCode
+    ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+    : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+
+  memcpy(Addr, pData, DataSize);
+  DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+               << " obj addr: " << format("%p", pData)
+               << " new addr: " << format("%p", Addr)
+               << " DataSize: " << DataSize
+               << " StubBufSize: " << StubBufSize
+               << " Allocate: " << Allocate
+               << "\n");
+  Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
+  return SectionID;
+}
+
+unsigned RuntimeDyldImpl::
+findOrEmitSection(const SectionRef &Section, bool IsCode,
+                  ObjSectionToIDMap &LocalSections) {
+
+  unsigned SectionID = 0;
+  ObjSectionToIDMap::iterator sIDIt = LocalSections.find(Section);
+  if (sIDIt != LocalSections.end())
+    SectionID = sIDIt->second;
+  else {
+    SectionID = emitSection(Section, IsCode);
+    LocalSections[Section] = SectionID;
+  }
+  return SectionID;
+}
+
+void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value,
+                                   unsigned SectionID, uintptr_t Offset,
+                                   uint32_t RelType) {
+  DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName)
+               << " SID: " << Value.SectionID
+               << " Addend: " << format("%p", Value.Addend)
+               << " Offset: " << format("%p", Offset)
+               << " RelType: " << format("%x", RelType)
+               << "\n");
+
+  if (Value.SymbolName == 0) {
+    Relocations[Value.SectionID].push_back(RelocationEntry(
+      SectionID,
+      Offset,
+      RelType,
+      Value.Addend));
+  } else
+    SymbolRelocations[Value.SymbolName].push_back(RelocationEntry(
+      SectionID,
+      Offset,
+      RelType,
+      Value.Addend));
+}
+
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
+  // TODO: There is only ARM far stub now. We should add the Thumb stub,
+  // and stubs for branches Thumb - ARM and ARM - Thumb.
+  if (Arch == Triple::arm) {
+    uint32_t *StubAddr = (uint32_t*)Addr;
+    *StubAddr = 0xe51ff004; // ldr pc,<label>
+    return (uint8_t*)++StubAddr;
+  }
+  else
+    return Addr;
 }
 
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
+                                             uint64_t Addr) {
+  // The address to use for relocation resolution is not
+  // the address of the local section buffer. We must be doing
+  // a remote execution environment of some sort. Re-apply any
+  // relocations referencing this section with the given address.
+  //
+  // Addr is a uint64_t because we can't assume the pointer width
+  // of the target is the same as that of the host. Just use a generic
+  // "big enough" type.
+  Sections[SectionID].LoadAddress = Addr;
+  DEBUG(dbgs() << "Resolving relocations Section #" << SectionID
+          << "\t" << format("%p", (uint8_t *)Addr)
+          << "\n");
+  resolveRelocationList(Relocations[SectionID], Addr);
+}
+
+void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
+                                             uint64_t Value) {
+    uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+          << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " Data: " << RE.Data
+          << " Addend: " << RE.Addend
+          << "\n");
+
+    resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+                      Value, RE.Data, RE.Addend);
+}
+
+void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
+                                            uint64_t Value) {
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    resolveRelocationEntry(Relocs[i], Value);
+  }
+}
+
+// resolveSymbols - Resolve any relocations to the specified symbols if
+// we know where it lives.
+void RuntimeDyldImpl::resolveSymbols() {
+  StringMap<RelocationList>::iterator it = SymbolRelocations.begin(),
+                                      itEnd = SymbolRelocations.end();
+  for (; it != itEnd; it++) {
+    StringRef Name = it->first();
+    RelocationList &Relocs = it->second;
+    StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
+    if (Loc == SymbolTable.end()) {
+      // This is an external symbol, try to get it address from
+      // MemoryManager.
+      uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+                                                                   true);
+      DEBUG(dbgs() << "Resolving relocations Name: " << Name
+              << "\t" << format("%p", Addr)
+              << "\n");
+      resolveRelocationList(Relocs, (uintptr_t)Addr);
+    } else {
+      // Change the relocation to be section relative rather than symbol
+      // relative and move it to the resolved relocation list.
+      DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
+      for (int i = 0, e = Relocs.size(); i != e; ++i) {
+        RelocationEntry Entry = Relocs[i];
+        Entry.Addend += Loc->second.second;
+        Relocations[Loc->second.first].push_back(Entry);
+      }
+      Relocs.clear();
+    }
+  }
+}
+
+
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
 RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index e15b200..9351b6c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -25,222 +25,58 @@ using namespace llvm::object;
 
 namespace llvm {
 
-namespace {
 
-// FIXME: this function should probably not live here...
-//
-// Returns the name and address of an unrelocated symbol in an ELF section
-void getSymbolInfo(symbol_iterator Sym, uint64_t &Addr, StringRef &Name) {
-  //FIXME: error checking here required to catch corrupt ELF objects...
-  error_code Err = Sym->getName(Name);
-
-  uint64_t AddrInSection;
-  Err = Sym->getAddress(AddrInSection);
-
-  SectionRef empty_section;
-  section_iterator Section(empty_section);
-  Err = Sym->getSection(Section);
-
-  StringRef SectionContents;
-  Section->getContents(SectionContents);
-
-  Addr = reinterpret_cast<uint64_t>(SectionContents.data()) + AddrInSection;
-}
-
-}
-
-bool RuntimeDyldELF::loadObject(MemoryBuffer *InputBuffer) {
-  if (!isCompatibleFormat(InputBuffer))
-    return true;
-
-  OwningPtr<ObjectFile> Obj(ObjectFile::createELFObjectFile(InputBuffer));
-
-  Arch = Obj->getArch();
-
-  // Map address in the Object file image to function names
-  IntervalMap<uint64_t, StringRef>::Allocator A;
-  IntervalMap<uint64_t, StringRef> FuncMap(A);
-
-  // This is a bit of a hack.  The ObjectFile we've just loaded reports
-  // section addresses as 0 and doesn't provide access to the section
-  // offset (from which we could calculate the address.  Instead,
-  // we're storing the address when it comes up in the ST_Debug case
-  // below.
-  //
-  StringMap<uint64_t> DebugSymbolMap;
-
-  symbol_iterator SymEnd = Obj->end_symbols();
-  error_code Err;
-  for (symbol_iterator Sym = Obj->begin_symbols();
-       Sym != SymEnd; Sym.increment(Err)) {
-    SymbolRef::Type Type;
-    Sym->getType(Type);
-    if (Type == SymbolRef::ST_Function) {
-      StringRef Name;
-      uint64_t Addr;
-      getSymbolInfo(Sym, Addr, Name);
-
-      uint64_t Size;
-      Err = Sym->getSize(Size);
-
-      uint8_t *Start;
-      uint8_t *End;
-      Start = reinterpret_cast<uint8_t*>(Addr);
-      End   = reinterpret_cast<uint8_t*>(Addr + Size - 1);
-
-      extractFunction(Name, Start, End);
-      FuncMap.insert(Addr, Addr + Size - 1, Name);
-    } else if (Type == SymbolRef::ST_Debug) {
-      // This case helps us find section addresses
-      StringRef Name;
-      uint64_t Addr;
-      getSymbolInfo(Sym, Addr, Name);
-      DebugSymbolMap[Name] = Addr;
-    }
-  }
-
-  // Iterate through the relocations for this object
-  section_iterator SecEnd = Obj->end_sections();
-  for (section_iterator Sec = Obj->begin_sections();
-       Sec != SecEnd; Sec.increment(Err)) {
-    StringRef SecName;
-    uint64_t  SecAddr;
-    Sec->getName(SecName);
-    // Ignore sections that aren't in our map
-    if (DebugSymbolMap.find(SecName) == DebugSymbolMap.end()) {
-      continue;
-    }
-    SecAddr = DebugSymbolMap[SecName];
-    relocation_iterator RelEnd = Sec->end_relocations();
-    for (relocation_iterator Rel = Sec->begin_relocations();
-         Rel != RelEnd; Rel.increment(Err)) {
-      uint64_t RelOffset;
-      uint64_t RelType;
-      int64_t RelAddend;
-      SymbolRef RelSym;
-      StringRef SymName;
-      uint64_t SymAddr;
-      uint64_t SymOffset;
-
-      Rel->getAddress(RelOffset);
-      Rel->getType(RelType);
-      Rel->getAdditionalInfo(RelAddend);
-      Rel->getSymbol(RelSym);
-      RelSym.getName(SymName);
-      RelSym.getAddress(SymAddr);
-      RelSym.getFileOffset(SymOffset);
-
-      // If this relocation is inside a function, we want to store the
-      // function name and a function-relative offset
-      IntervalMap<uint64_t, StringRef>::iterator ContainingFunc
-        = FuncMap.find(SecAddr + RelOffset);
-      if (ContainingFunc.valid()) {
-        // Re-base the relocation to make it relative to the target function
-        RelOffset = (SecAddr + RelOffset) - ContainingFunc.start();
-        Relocations[SymName].push_back(RelocationEntry(ContainingFunc.value(),
-                                                       RelOffset,
-                                                       RelType,
-                                                       RelAddend,
-                                                       true));
-      } else {
-        Relocations[SymName].push_back(RelocationEntry(SecName,
-                                                       RelOffset,
-                                                       RelType,
-                                                       RelAddend,
-                                                       false));
-      }
-    }
-  }
-  return false;
-}
-
-void RuntimeDyldELF::resolveRelocations() {
-  // FIXME: deprecated. should be changed to use the by-section
-  // allocation and relocation scheme.
-
-  // Just iterate over the symbols in our symbol table and assign their
-  // addresses.
-  StringMap<SymbolLoc>::iterator i = SymbolTable.begin();
-  StringMap<SymbolLoc>::iterator e = SymbolTable.end();
-  for (;i != e; ++i) {
-    assert (i->getValue().second == 0 && "non-zero offset in by-function sym!");
-    reassignSymbolAddress(i->getKey(),
-                          (uint8_t*)Sections[i->getValue().first].base());
-  }
-}
-
-void RuntimeDyldELF::resolveX86_64Relocation(StringRef Name,
-                                             uint8_t *Addr,
-                                             const RelocationEntry &RE) {
-  uint8_t *TargetAddr;
-  if (RE.IsFunctionRelative) {
-    StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target);
-    assert(Loc != SymbolTable.end() && "Function for relocation not found");
-    TargetAddr =
-      reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) +
-      Loc->second.second + RE.Offset;
-  } else {
-    // FIXME: Get the address of the target section and add that to RE.Offset
-    llvm_unreachable("Non-function relocation not implemented yet!");
-  }
-
-  switch (RE.Type) {
-  default: llvm_unreachable("Relocation type not implemented yet!");
+void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
+                                             uint64_t FinalAddress,
+                                             uint64_t Value,
+                                             uint32_t Type,
+                                             int64_t Addend) {
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+  break;
   case ELF::R_X86_64_64: {
-    uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr);
-    *Target = Addr + RE.Addend;
+    uint64_t *Target = (uint64_t*)(LocalAddress);
+    *Target = Value + Addend;
     break;
   }
   case ELF::R_X86_64_32:
   case ELF::R_X86_64_32S: {
-    uint64_t Value = reinterpret_cast<uint64_t>(Addr) + RE.Addend;
+    Value += Addend;
     // FIXME: Handle the possibility of this assertion failing
-    assert((RE.Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
-           (RE.Type == ELF::R_X86_64_32S &&
+    assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
+           (Type == ELF::R_X86_64_32S &&
             (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
     uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
-    uint32_t *Target = reinterpret_cast<uint32_t*>(TargetAddr);
+    uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
     *Target = TruncatedAddr;
     break;
   }
   case ELF::R_X86_64_PC32: {
-    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr);
-    uint64_t RealOffset = *Placeholder +
-                           reinterpret_cast<uint64_t>(Addr) +
-                           RE.Addend - reinterpret_cast<uint64_t>(TargetAddr);
-    assert((RealOffset & 0xFFFFFFFF) == RealOffset);
-    uint32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+    int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+    assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+    int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
     *Placeholder = TruncOffset;
     break;
   }
   }
 }
 
-void RuntimeDyldELF::resolveX86Relocation(StringRef Name,
-                                          uint8_t *Addr,
-                                          const RelocationEntry &RE) {
-  uint8_t *TargetAddr;
-  if (RE.IsFunctionRelative) {
-    StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target);
-    assert(Loc != SymbolTable.end() && "Function for relocation not found");
-    TargetAddr =
-      reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) +
-      Loc->second.second + RE.Offset;
-  } else {
-    // FIXME: Get the address of the target section and add that to RE.Offset
-    llvm_unreachable("Non-function relocation not implemented yet!");
-  }
-
-  switch (RE.Type) {
+void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
+                                          uint32_t FinalAddress,
+                                          uint32_t Value,
+                                          uint32_t Type,
+                                          int32_t Addend) {
+  switch (Type) {
   case ELF::R_386_32: {
-    uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr);
-    *Target = Addr + RE.Addend;
+    uint32_t *Target = (uint32_t*)(LocalAddress);
+    *Target = Value + Addend;
     break;
   }
   case ELF::R_386_PC32: {
-    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr);
-    uint32_t RealOffset = *Placeholder + reinterpret_cast<uintptr_t>(Addr) +
-                           RE.Addend - reinterpret_cast<uintptr_t>(TargetAddr);
+    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+    uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
     *Placeholder = RealOffset;
     break;
     }
@@ -248,57 +84,173 @@ void RuntimeDyldELF::resolveX86Relocation(StringRef Name,
       // There are other relocation types, but it appears these are the
       //  only ones currently used by the LLVM ELF object writer
       llvm_unreachable("Relocation type not implemented yet!");
+      break;
   }
 }
 
-void RuntimeDyldELF::resolveArmRelocation(StringRef Name,
-                                          uint8_t *Addr,
-                                          const RelocationEntry &RE) {
+void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
+                                          uint32_t FinalAddress,
+                                          uint32_t Value,
+                                          uint32_t Type,
+                                          int32_t Addend) {
+  // TODO: Add Thumb relocations.
+  uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+  Value += Addend;
+
+  DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress
+               << " FinalAddress: " << format("%p",FinalAddress)
+               << " Value: " << format("%x",Value)
+               << " Type: " << format("%x",Type)
+               << " Addend: " << format("%x",Addend)
+               << "\n");
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Not implemented relocation type!");
+
+  // Just write 32bit value to relocation address
+  case ELF::R_ARM_ABS32 :
+    *TargetPtr = Value;
+    break;
+
+  // Write first 16 bit of 32 bit value to the mov instruction.
+  // Last 4 bit should be shifted.
+  case ELF::R_ARM_MOVW_ABS_NC :
+    Value = Value & 0xFFFF;
+    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+    break;
+
+  // Write last 16 bit of 32 bit value to the mov instruction.
+  // Last 4 bit should be shifted.
+  case ELF::R_ARM_MOVT_ABS :
+    Value = (Value >> 16) & 0xFFFF;
+    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+    break;
+
+  // Write 24 bit relative value to the branch instruction.
+  case ELF::R_ARM_PC24 :    // Fall through.
+  case ELF::R_ARM_CALL :    // Fall through.
+  case ELF::R_ARM_JUMP24 :
+    int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
+    RelValue = (RelValue & 0x03FFFFFC) >> 2;
+    *TargetPtr &= 0xFF000000;
+    *TargetPtr |= RelValue;
+    break;
+  }
 }
 
-void RuntimeDyldELF::resolveRelocation(StringRef Name,
-                                       uint8_t *Addr,
-                                       const RelocationEntry &RE) {
+void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
+                                       uint64_t FinalAddress,
+                                       uint64_t Value,
+                                       uint32_t Type,
+                                       int64_t Addend) {
   switch (Arch) {
   case Triple::x86_64:
-    resolveX86_64Relocation(Name, Addr, RE);
+    resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend);
     break;
   case Triple::x86:
-    resolveX86Relocation(Name, Addr, RE);
+    resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+                         (uint32_t)(Value & 0xffffffffL), Type,
+                         (uint32_t)(Addend & 0xffffffffL));
     break;
-  case Triple::arm:
-    resolveArmRelocation(Name, Addr, RE);
+  case Triple::arm:    // Fall through.
+  case Triple::thumb:
+    resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+                         (uint32_t)(Value & 0xffffffffL), Type,
+                         (uint32_t)(Addend & 0xffffffffL));
     break;
   default: llvm_unreachable("Unsupported CPU type!");
   }
 }
 
-void RuntimeDyldELF::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
-  // FIXME: deprecated. switch to reassignSectionAddress() instead.
-  //
-  // Actually moving the symbol address requires by-section mapping.
-  assert(Sections[SymbolTable.lookup(Name).first].base() == (void*)Addr &&
-         "Unable to relocate section in by-function JIT allocation model!");
-
-  RelocationList &Relocs = Relocations[Name];
-  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    RelocationEntry &RE = Relocs[i];
-    resolveRelocation(Name, Addr, RE);
+void RuntimeDyldELF::
+processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj,
+                     ObjSectionToIDMap &ObjSectionToID,
+                     LocalSymbolMap &Symbols, StubMap &Stubs) {
+
+  uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
+  intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
+  RelocationValueRef Value;
+  StringRef TargetName;
+  const SymbolRef &Symbol = Rel.Symbol;
+  Symbol.getName(TargetName);
+  DEBUG(dbgs() << "\t\tRelType: " << RelType
+               << " Addend: " << Addend
+               << " TargetName: " << TargetName
+               << "\n");
+  // First look the symbol in object file symbols.
+  LocalSymbolMap::iterator it = Symbols.find(TargetName.data());
+  if (it != Symbols.end()) {
+    Value.SectionID = it->second.first;
+    Value.Addend = it->second.second;
+  } else {
+    // Second look the symbol in global symbol table.
+    StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data());
+    if (itS != SymbolTable.end()) {
+      Value.SectionID = itS->second.first;
+      Value.Addend = itS->second.second;
+    } else {
+      SymbolRef::Type SymType;
+      Symbol.getType(SymType);
+      switch (SymType) {
+        case SymbolRef::ST_Debug: {
+          // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
+          // and can be changed by another developers. Maybe best way is add
+          // a new symbol type ST_Section to SymbolRef and use it.
+          section_iterator sIt = Obj.end_sections();
+          Symbol.getSection(sIt);
+          if (sIt == Obj.end_sections())
+            llvm_unreachable("Symbol section not found, bad object file format!");
+          DEBUG(dbgs() << "\t\tThis is section symbol\n");
+          Value.SectionID = findOrEmitSection((*sIt), true, ObjSectionToID);
+          Value.Addend = Addend;
+          break;
+        }
+        case SymbolRef::ST_Unknown: {
+          Value.SymbolName = TargetName.data();
+          Value.Addend = Addend;
+          break;
+        }
+        default:
+          llvm_unreachable("Unresolved symbol type!");
+          break;
+      }
+    }
   }
-}
-
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldELF::reassignSectionAddress(unsigned SectionID, uint64_t Addr) {
-  // The address to use for relocation resolution is not
-  // the address of the local section buffer. We must be doing
-  // a remote execution environment of some sort. Re-apply any
-  // relocations referencing this section with the given address.
-  //
-  // Addr is a uint64_t because we can't assume the pointer width
-  // of the target is the same as that of the host. Just use a generic
-  // "big enough" type.
-  assert(0);
+  DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
+               << " Rel.Offset: " << Rel.Offset
+               << "\n");
+  if (Arch == Triple::arm &&
+      (RelType == ELF::R_ARM_PC24 ||
+       RelType == ELF::R_ARM_CALL ||
+       RelType == ELF::R_ARM_JUMP24)) {
+    // This is an ARM branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+    SectionEntry &Section = Sections[Rel.SectionID];
+    uint8_t *Target = Section.Address + Rel.Offset;
+
+    //  Look up for existing stub.
+    StubMap::const_iterator stubIt = Stubs.find(Value);
+    if (stubIt != Stubs.end()) {
+      resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+                        stubIt->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+      AddRelocation(Value, Rel.SectionID,
+                    StubTargetAddr - Section.Address, ELF::R_ARM_ABS32);
+      resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+                        Section.StubOffset, RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else
+    AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
 }
 
 bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index e0f7d54..36566da 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -21,158 +21,42 @@ using namespace llvm;
 
 namespace llvm {
 class RuntimeDyldELF : public RuntimeDyldImpl {
-    // For each symbol, keep a list of relocations based on it. Anytime
-    // its address is reassigned (the JIT re-compiled the function, e.g.),
-    // the relocations get re-resolved.
-    struct RelocationEntry {
-      // Function or section this relocation is contained in.
-      std::string Target;
-      // Offset into the target function or section for the relocation.
-      uint32_t    Offset;
-      // Relocation type
-      uint32_t    Type;
-      // Addend encoded in the instruction itself, if any.
-      int32_t     Addend;
-      // Has the relocation been recalcuated as an offset within a function?
-      bool        IsFunctionRelative;
-      // Has this relocation been resolved previously?
-      bool        isResolved;
-
-      RelocationEntry(StringRef t,
-                      uint32_t offset,
-                      uint32_t type,
-                      int32_t addend,
-                      bool isFunctionRelative)
-        : Target(t)
-        , Offset(offset)
-        , Type(type)
-        , Addend(addend)
-        , IsFunctionRelative(isFunctionRelative)
-        , isResolved(false) { }
-    };
-    typedef SmallVector<RelocationEntry, 4> RelocationList;
-    StringMap<RelocationList> Relocations;
-    unsigned Arch;
-
-    void resolveRelocations();
-
-    void resolveX86_64Relocation(StringRef Name,
-                                 uint8_t *Addr,
-                                 const RelocationEntry &RE);
-
-    void resolveX86Relocation(StringRef Name,
-                              uint8_t *Addr,
-                              const RelocationEntry &RE);
-
-    void resolveArmRelocation(StringRef Name,
-                              uint8_t *Addr,
-                              const RelocationEntry &RE);
-
-    void resolveRelocation(StringRef Name,
-                           uint8_t *Addr,
-                           const RelocationEntry &RE);
-
-public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
-  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
-};
-
-} // end namespace llvm
-
-#endif 
-
-//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ELF support for MC-JIT runtime dynamic linker.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_RUNTIME_DYLD_ELF_H
-#define LLVM_RUNTIME_DYLD_ELF_H
-
-#include "RuntimeDyldImpl.h"
-
-using namespace llvm;
-
-
-namespace llvm {
-class RuntimeDyldELF : public RuntimeDyldImpl {
-    // For each symbol, keep a list of relocations based on it. Anytime
-    // its address is reassigned (the JIT re-compiled the function, e.g.),
-    // the relocations get re-resolved.
-    struct RelocationEntry {
-      // Function or section this relocation is contained in.
-      std::string Target;
-      // Offset into the target function or section for the relocation.
-      uint32_t    Offset;
-      // Relocation type
-      uint32_t    Type;
-      // Addend encoded in the instruction itself, if any.
-      int32_t     Addend;
-      // Has the relocation been recalcuated as an offset within a function?
-      bool        IsFunctionRelative;
-      // Has this relocation been resolved previously?
-      bool        isResolved;
-
-      RelocationEntry(StringRef t,
-                      uint32_t offset,
-                      uint32_t type,
-                      int32_t addend,
-                      bool isFunctionRelative)
-        : Target(t)
-        , Offset(offset)
-        , Type(type)
-        , Addend(addend)
-        , IsFunctionRelative(isFunctionRelative)
-        , isResolved(false) { }
-    };
-    typedef SmallVector<RelocationEntry, 4> RelocationList;
-    StringMap<RelocationList> Relocations;
-    unsigned Arch;
-
-    void resolveRelocations();
-
-    void resolveX86_64Relocation(StringRef Name,
-                                 uint8_t *Addr,
-                                 const RelocationEntry &RE);
-
-    void resolveX86Relocation(StringRef Name,
-                              uint8_t *Addr,
-                              const RelocationEntry &RE);
-
-    void resolveArmRelocation(StringRef Name,
-                              uint8_t *Addr,
-                              const RelocationEntry &RE);
-
-    void resolveRelocation(StringRef Name,
-                           uint8_t *Addr,
-                           const RelocationEntry &RE);
+protected:
+  void resolveX86_64Relocation(uint8_t *LocalAddress,
+                               uint64_t FinalAddress,
+                               uint64_t Value,
+                               uint32_t Type,
+                               int64_t Addend);
+
+  void resolveX86Relocation(uint8_t *LocalAddress,
+                            uint32_t FinalAddress,
+                            uint32_t Value,
+                            uint32_t Type,
+                            int32_t Addend);
+
+  void resolveARMRelocation(uint8_t *LocalAddress,
+                            uint32_t FinalAddress,
+                            uint32_t Value,
+                            uint32_t Type,
+                            int32_t Addend);
+
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend);
+
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs);
 
 public:
   RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
 
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
   bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
 };
 
 } // end namespace llvm
 
-#endif 
-
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 28e99be..d6430a9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -15,45 +15,125 @@
 #define LLVM_RUNTIME_DYLD_IMPL_H
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/system_error.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Triple.h"
+#include <map>
+#include "llvm/Support/Format.h"
 
 using namespace llvm;
+using namespace llvm::object;
 
 namespace llvm {
+
+class SectionEntry {
+public:
+  uint8_t* Address;
+  size_t Size;
+  uint64_t LoadAddress;   // For each section, the address it will be
+                          // considered to live at for relocations. The same
+                          // as the pointer to the above memory block for
+                          // hosted JITs.
+  uintptr_t StubOffset;   // It's used for architecturies with stub
+                          // functions for far relocations like ARM.
+  uintptr_t ObjAddress;   // Section address in object file. It's use for
+                          // calculate MachO relocation addend
+  SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset,
+               uintptr_t objAddress)
+    : Address(address), Size(size), LoadAddress((uintptr_t)address),
+      StubOffset(stubOffset), ObjAddress(objAddress) {}
+};
+
+class RelocationEntry {
+public:
+  unsigned    SectionID;  // Section the relocation is contained in.
+  uintptr_t   Offset;     // Offset into the section for the relocation.
+  uint32_t    Data;       // Relocatino data. Including type of relocation
+                          // and another flags and parameners from
+  intptr_t    Addend;     // Addend encoded in the instruction itself, if any,
+                          // plus the offset into the source section for
+                          // the symbol once the relocation is resolvable.
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
+    : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
+};
+
+// Raw relocation data from object file
+class ObjRelocationInfo {
+public:
+  unsigned  SectionID;
+  uint64_t  Offset;
+  SymbolRef Symbol;
+  uint64_t  Type;
+  int64_t   AdditionalInfo;
+};
+
+class RelocationValueRef {
+public:
+  unsigned  SectionID;
+  intptr_t  Addend;
+  const char *SymbolName;
+  RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+
+  inline bool operator==(const RelocationValueRef &Other) const {
+    return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+  }
+  inline bool operator <(const RelocationValueRef &Other) const {
+    return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+  }
+};
+
 class RuntimeDyldImpl {
 protected:
-  unsigned CPUType;
-  unsigned CPUSubtype;
-
   // The MemoryManager to load objects into.
   RTDyldMemoryManager *MemMgr;
 
-  // For each section, we have a MemoryBlock of it's data.
-  // Indexed by SectionID.
-  SmallVector<sys::MemoryBlock, 32> Sections;
-  // For each section, the address it will be considered to live at for
-  // relocations. The same as the pointer to the above memory block for hosted
-  // JITs. Indexed by SectionID.
-  SmallVector<uint64_t, 32> SectionLoadAddress;
+  // A list of emmitted sections.
+  typedef SmallVector<SectionEntry, 64> SectionList;
+  SectionList Sections;
 
-  // Keep a map of starting local address to the SectionID which references it.
-  // Lookup function for when we assign virtual addresses.
-  DenseMap<void *, unsigned> SectionLocalMemToID;
+  // Keep a map of sections from object file to the SectionID which
+  // references it.
+  typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
 
   // Master symbol table. As modules are loaded and external symbols are
   // resolved, their addresses are stored here as a SectionID/Offset pair.
-  typedef std::pair<unsigned, uint64_t> SymbolLoc;
+  typedef std::pair<unsigned, uintptr_t> SymbolLoc;
   StringMap<SymbolLoc> SymbolTable;
+  typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap;
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  // The symbol (or section) the relocation is sourced from is the Key
+  // in the relocation list where it's stored.
+  typedef SmallVector<RelocationEntry, 64> RelocationList;
+  // Relocations to sections already loaded. Indexed by SectionID which is the
+  // source of the address. The target where the address will be writen is
+  // SectionID/Offset in the relocation itself.
+  DenseMap<unsigned, RelocationList> Relocations;
+  // Relocations to external symbols that are not yet resolved.
+  // Indexed by symbol name.
+  StringMap<RelocationList> SymbolRelocations;
+
+  typedef std::map<RelocationValueRef, uintptr_t> StubMap;
+
+  Triple::ArchType Arch;
+
+  inline unsigned getMaxStubSize() {
+    if (Arch == Triple::arm || Arch == Triple::thumb)
+      return 8; // 32-bit instruction and 32-bit address
+    else
+      return 0;
+  }
 
   bool HasError;
   std::string ErrorStr;
@@ -66,17 +146,62 @@ protected:
   }
 
   uint8_t *getSectionAddress(unsigned SectionID) {
-    return (uint8_t*)Sections[SectionID].base();
+    return (uint8_t*)Sections[SectionID].Address;
   }
-  void extractFunction(StringRef Name, uint8_t *StartAddress,
-                       uint8_t *EndAddress);
 
+  /// \brief Emits section data from the object file to the MemoryManager.
+  /// \param IsCode if it's true then allocateCodeSection() will be
+  ///        used for emmits, else allocateDataSection() will be used.
+  /// \return SectionID.
+  unsigned emitSection(const SectionRef &Section, bool IsCode);
+
+  /// \brief Find Section in LocalSections. If the secton is not found - emit
+  ///        it and store in LocalSections.
+  /// \param IsCode if it's true then allocateCodeSection() will be
+  ///        used for emmits, else allocateDataSection() will be used.
+  /// \return SectionID.
+  unsigned findOrEmitSection(const SectionRef &Section, bool IsCode,
+                             ObjSectionToIDMap &LocalSections);
+
+  /// \brief If Value.SymbolName is NULL then store relocation to the
+  ///        Relocations, else store it in the SymbolRelocations.
+  void AddRelocation(const RelocationValueRef &Value, unsigned SectionID,
+                     uintptr_t Offset, uint32_t RelType);
+
+  /// \brief Emits long jump instruction to Addr.
+  /// \return Pointer to the memory area for emitting target address.
+  uint8_t* createStubFunction(uint8_t *Addr);
+
+  /// \brief Resolves relocations from Relocs list with address from Value.
+  void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
+  void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
+
+  /// \brief A object file specific relocation resolver
+  /// \param Address Address to apply the relocation action
+  /// \param Value Target symbol address to apply the relocation action
+  /// \param Type object file specific relocation type
+  /// \param Addend A constant addend used to compute the value to be stored
+  ///        into the relocatable field
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend) = 0;
+
+  /// \brief Parses the object file relocation and store it to Relocations
+  ///        or SymbolRelocations. Its depend from object file type.
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs) = 0;
+
+  void resolveSymbols();
 public:
   RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
 
   virtual ~RuntimeDyldImpl();
 
-  virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+  bool loadObject(const MemoryBuffer *InputBuffer);
 
   void *getSymbolAddress(StringRef Name) {
     // FIXME: Just look up as a function for now. Overly simple of course.
@@ -87,9 +212,9 @@ public:
     return getSectionAddress(Loc.first) + Loc.second;
   }
 
-  virtual void resolveRelocations();
+  void resolveRelocations();
 
-  virtual void reassignSectionAddress(unsigned SectionID, uint64_t Addr) = 0;
+  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
 
   void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress);
 
@@ -103,6 +228,7 @@ public:
   StringRef getErrorString() { return ErrorStr; }
 
   virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c11b2c3..24437e0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,30 +21,64 @@ using namespace llvm::object;
 
 namespace llvm {
 
-bool RuntimeDyldMachO::
-resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel,
-                  unsigned Type, unsigned Size, int64_t Addend) {
+void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress,
+                                         uint64_t FinalAddress,
+                                         uint64_t Value,
+                                         uint32_t Type,
+                                         int64_t Addend) {
+  bool isPCRel = (Type >> 24) & 1;
+  unsigned MachoType = (Type >> 28) & 0xf;
+  unsigned Size = 1 << ((Type >> 25) & 3);
+
+  DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress)
+        << " FinalAddress: " << format("%p", FinalAddress)
+        << " Value: " << format("%p", Value)
+        << " Addend: " << Addend
+        << " isPCRel: " << isPCRel
+        << " MachoType: " << MachoType
+        << " Size: " << Size
+        << "\n");
+
   // This just dispatches to the proper target specific routine.
-  switch (CPUType) {
+  switch (Arch) {
   default: llvm_unreachable("Unsupported CPU type!");
-  case mach::CTM_x86_64:
-    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
-                                   isPCRel, Type, Size, Addend);
-  case mach::CTM_ARM:
-    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
-                                isPCRel, Type, Size, Addend);
+  case Triple::x86_64: // Fall through.
+  case Triple::x86:
+    resolveX86_64Relocation(LocalAddress,
+                            FinalAddress,
+                            (uintptr_t)Value,
+                            isPCRel,
+                            MachoType,
+                            Size,
+                            Addend);
+    break;
+  case Triple::arm:    // Fall through.
+  case Triple::thumb:
+    resolveARMRelocation(LocalAddress,
+                         FinalAddress,
+                         (uintptr_t)Value,
+                         isPCRel,
+                         MachoType,
+                         Size,
+                         Addend);
+    break;
   }
 }
 
 bool RuntimeDyldMachO::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                        unsigned Type, unsigned Size, int64_t Addend) {
+resolveX86_64Relocation(uint8_t *LocalAddress,
+                        uint64_t FinalAddress,
+                        uint64_t Value,
+                        bool isPCRel,
+                        unsigned Type,
+                        unsigned Size,
+                        int64_t Addend) {
   // If the relocation is PC-relative, the value to be encoded is the
   // pointer difference.
   if (isPCRel)
     // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
     // address. Is that expected? Only for branches, perhaps?
-    Value -= Address + 4;
+    Value -= FinalAddress + 4;
 
   switch(Type) {
   default:
@@ -58,7 +92,7 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
     Value += Addend;
     // Mask in the target value a byte at a time (we don't have an alignment
     // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
+    uint8_t *p = (uint8_t*)LocalAddress;
     for (unsigned i = 0; i < Size; ++i) {
       *p++ = (uint8_t)Value;
       Value >>= 8;
@@ -74,12 +108,17 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
 }
 
 bool RuntimeDyldMachO::
-resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                     unsigned Type, unsigned Size, int64_t Addend) {
+resolveARMRelocation(uint8_t *LocalAddress,
+                     uint64_t FinalAddress,
+                     uint64_t Value,
+                     bool isPCRel,
+                     unsigned Type,
+                     unsigned Size,
+                     int64_t Addend) {
   // If the relocation is PC-relative, the value to be encoded is the
   // pointer difference.
   if (isPCRel) {
-    Value -= Address;
+    Value -= FinalAddress;
     // ARM PCRel relocations have an effective-PC offset of two instructions
     // (four bytes in Thumb mode, 8 bytes in ARM mode).
     // FIXME: For now, assume ARM mode.
@@ -92,7 +131,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
   case macho::RIT_Vanilla: {
     // Mask in the target value a byte at a time (we don't have an alignment
     // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
+    uint8_t *p = (uint8_t*)LocalAddress;
     for (unsigned i = 0; i < Size; ++i) {
       *p++ = (uint8_t)Value;
       Value >>= 8;
@@ -102,7 +141,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
   case macho::RIT_ARM_Branch24Bit: {
     // Mask the value into the target address. We know instructions are
     // 32-bit aligned, so we can do it all at once.
-    uint32_t *p = (uint32_t*)Address;
+    uint32_t *p = (uint32_t*)LocalAddress;
     // The low two bits of the value are not encoded.
     Value >>= 2;
     // Mask the value to 24 bits.
@@ -128,463 +167,83 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
   return false;
 }
 
-bool RuntimeDyldMachO::
-loadSegment32(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  // FIXME: This should really be combined w/ loadSegment64. Templatized
-  // function on the 32/64 datatypes maybe?
-  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
-  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
-  if (!SegmentLC)
-    return Error("unable to load segment load command");
-
-
-  SmallVector<unsigned, 16> SectionMap;
-  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // Allocate memory via the MM for the section.
-    uint8_t *Buffer;
-    uint32_t SectionID = Sections.size();
-    if (Sect->Flags == 0x80000400)
-      Buffer = MemMgr->allocateCodeSection(Sect->Size, Sect->Align, SectionID);
-    else
-      Buffer = MemMgr->allocateDataSection(Sect->Size, Sect->Align, SectionID);
-
-    DEBUG(dbgs() << "Loading "
-                 << ((Sect->Flags == 0x80000400) ? "text" : "data")
-                 << " (ID #" << SectionID << ")"
-                 << " '" << Sect->SegmentName << ","
-                 << Sect->Name << "' of size " << Sect->Size
-                 << " to address " << Buffer << ".\n");
-
-    // Copy the payload from the object file into the allocated buffer.
-    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
-                                           SegmentLC->FileSize).data();
-    memcpy(Buffer, Base + Sect->Address, Sect->Size);
-
-    // Remember what got allocated for this SectionID.
-    Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size));
-    SectionLocalMemToID[Buffer] = SectionID;
-
-    // By default, the load address of a section is its memory buffer.
-    SectionLoadAddress.push_back((uint64_t)Buffer);
-
-    // Keep a map of object file section numbers to corresponding SectionIDs
-    // while processing the file.
-    SectionMap.push_back(SectionID);
-  }
-
-  // Process the symbol table.
-  SmallVector<StringRef, 64> SymbolNames;
-  processSymbols32(Obj, SectionMap, SymbolNames, SymtabLC);
-
-  // Process the relocations for each section we're loading.
-  Relocations.grow(Relocations.size() + SegmentLC->NumSections);
-  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      bool isExtern = (RE->Word1 >> 27) & 1;
-
-      // FIXME: Get the relocation addend from the target address.
-      // FIXME: VERY imporant for internal relocations.
-
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      if (!isExtern) {
-        assert(SourceNum > 0 && "Invalid relocation section number!");
-        unsigned SectionID = SectionMap[SourceNum - 1];
-        unsigned TargetID = SectionMap[SectNum];
-        DEBUG(dbgs() << "Internal relocation at Section #"
-                     << TargetID << " + " << Offset
-                     << " from Section #"
-                     << SectionID << " (Word1: "
-                     << format("0x%x", RE->Word1) << ")\n");
-
-        // Store the relocation information. It will get resolved when
-        // the section addresses are assigned.
-        Relocations[SectionID].push_back(RelocationEntry(TargetID,
-                                                         Offset,
-                                                         RE->Word1,
-                                                         0 /*Addend*/));
-      } else {
-        StringRef SourceName = SymbolNames[SourceNum];
-
-        // Now store the relocation information. Associate it with the source
-        // symbol. Just add it to the unresolved list and let the general
-        // path post-load resolve it if we know where the symbol is.
-        UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum,
-                                                                    Offset,
-                                                                    RE->Word1,
-                                                                 0 /*Addend*/));
-        DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset
-              << " from '" << SourceName << "(Word1: "
-              << format("0x%x", RE->Word1) << ")\n");
-      }
+void RuntimeDyldMachO::
+processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj,
+                     ObjSectionToIDMap &ObjSectionToID,
+                     LocalSymbolMap &Symbols, StubMap &Stubs) {
+
+  uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+  RelocationValueRef Value;
+  SectionEntry &Section = Sections[Rel.SectionID];
+  uint8_t *Target = Section.Address + Rel.Offset;
+
+  bool isExtern = (RelType >> 27) & 1;
+  if (isExtern) {
+    StringRef TargetName;
+    const SymbolRef &Symbol = Rel.Symbol;
+    Symbol.getName(TargetName);
+    // First look the symbol in object file symbols.
+    LocalSymbolMap::iterator it = Symbols.find(TargetName.data());
+    if (it != Symbols.end()) {
+      Value.SectionID = it->second.first;
+      Value.Addend = it->second.second;
+    } else {
+      // Second look the symbol in global symbol table.
+      StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data());
+      if (itS != SymbolTable.end()) {
+        Value.SectionID = itS->second.first;
+        Value.Addend = itS->second.second;
+      } else
+        Value.SymbolName = TargetName.data();
     }
-  }
-
-  // Resolve the addresses of any symbols that were defined in this segment.
-  for (int i = 0, e = SymbolNames.size(); i != e; ++i)
-    resolveSymbol(SymbolNames[i]);
-
-  return false;
-}
-
-
-bool RuntimeDyldMachO::
-loadSegment64(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
-  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
-  if (!Segment64LC)
-    return Error("unable to load segment load command");
-
-
-  SmallVector<unsigned, 16> SectionMap;
-  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // Allocate memory via the MM for the section.
-    uint8_t *Buffer;
-    uint32_t SectionID = Sections.size();
-    unsigned Align = 1 << Sect->Align; // .o file has log2 alignment.
-    if (Sect->Flags == 0x80000400)
-      Buffer = MemMgr->allocateCodeSection(Sect->Size, Align, SectionID);
-    else
-      Buffer = MemMgr->allocateDataSection(Sect->Size, Align, SectionID);
-
-    DEBUG(dbgs() << "Loading "
-                 << ((Sect->Flags == 0x80000400) ? "text" : "data")
-                 << " (ID #" << SectionID << ")"
-                 << " '" << Sect->SegmentName << ","
-                 << Sect->Name << "' of size " << Sect->Size
-                 << " (align " << Align << ")"
-                 << " to address " << Buffer << ".\n");
-
-    // Copy the payload from the object file into the allocated buffer.
-    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
-                                           Segment64LC->FileSize).data();
-    memcpy(Buffer, Base + Sect->Address, Sect->Size);
-
-    // Remember what got allocated for this SectionID.
-    Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size));
-    SectionLocalMemToID[Buffer] = SectionID;
-
-    // By default, the load address of a section is its memory buffer.
-    SectionLoadAddress.push_back((uint64_t)Buffer);
-
-    // Keep a map of object file section numbers to corresponding SectionIDs
-    // while processing the file.
-    SectionMap.push_back(SectionID);
-  }
-
-  // Process the symbol table.
-  SmallVector<StringRef, 64> SymbolNames;
-  processSymbols64(Obj, SectionMap, SymbolNames, SymtabLC);
-
-  // Process the relocations for each section we're loading.
-  Relocations.grow(Relocations.size() + Segment64LC->NumSections);
-  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      bool isExtern = (RE->Word1 >> 27) & 1;
-
-      // FIXME: Get the relocation addend from the target address.
-      // FIXME: VERY imporant for internal relocations.
-
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      if (!isExtern) {
-        assert(SourceNum > 0 && "Invalid relocation section number!");
-        unsigned SectionID = SectionMap[SourceNum - 1];
-        unsigned TargetID = SectionMap[SectNum];
-        DEBUG(dbgs() << "Internal relocation at Section #"
-                     << TargetID << " + " << Offset
-                     << " from Section #"
-                     << SectionID << " (Word1: "
-                     << format("0x%x", RE->Word1) << ")\n");
-
-        // Store the relocation information. It will get resolved when
-        // the section addresses are assigned.
-        Relocations[SectionID].push_back(RelocationEntry(TargetID,
-                                                         Offset,
-                                                         RE->Word1,
-                                                         0 /*Addend*/));
-      } else {
-        StringRef SourceName = SymbolNames[SourceNum];
-
-        // Now store the relocation information. Associate it with the source
-        // symbol. Just add it to the unresolved list and let the general
-        // path post-load resolve it if we know where the symbol is.
-        UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum,
-                                                                    Offset,
-                                                                    RE->Word1,
-                                                                 0 /*Addend*/));
-        DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset
-              << " from '" << SourceName << "(Word1: "
-              << format("0x%x", RE->Word1) << ")\n");
-      }
+  } else {
+    error_code err;
+    uint8_t sIdx = static_cast<uint8_t>(RelType & 0xFF);
+    section_iterator sIt = Obj.begin_sections(),
+                     sItEnd = Obj.end_sections();
+    for (uint8_t i = 1; i < sIdx; i++) {
+      error_code err;
+      sIt.increment(err);
+      if (sIt == sItEnd)
+        break;
     }
-  }
-
-  // Resolve the addresses of any symbols that were defined in this segment.
-  for (int i = 0, e = SymbolNames.size(); i != e; ++i)
-    resolveSymbol(SymbolNames[i]);
-
-  return false;
-}
-
-bool RuntimeDyldMachO::
-processSymbols32(const MachOObject *Obj,
-                 SmallVectorImpl<unsigned> &SectionMap,
-                 SmallVectorImpl<StringRef> &SymbolNames,
-                 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  // FIXME: Combine w/ processSymbols64. Factor 64/32 datatype and such.
-  for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-    InMemoryStruct<macho::SymbolTableEntry> STE;
-    Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
-    if (!STE)
-      return Error("unable to read symbol: '" + Twine(i) + "'");
-    // Get the symbol name.
-    StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-    SymbolNames.push_back(Name);
-
-    // FIXME: Check the symbol type and flags.
-    if (STE->Type != 0xF)  // external, defined in this segment.
-      continue;
-    // Flags in the upper nibble we don't care about.
-    if ((STE->Flags & 0xf) != 0x0)
-      continue;
-
-    // Remember the symbol.
-    uint32_t SectionID = SectionMap[STE->SectionIndex - 1];
-    SymbolTable[Name] = SymbolLoc(SectionID, STE->Value);
-
-    DEBUG(dbgs() << "Symbol: '" << Name << "' @ "
-                 << (getSectionAddress(SectionID) + STE->Value)
-                 << "\n");
-  }
-  return false;
-}
-
-bool RuntimeDyldMachO::
-processSymbols64(const MachOObject *Obj,
-                 SmallVectorImpl<unsigned> &SectionMap,
-                 SmallVectorImpl<StringRef> &SymbolNames,
-                 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-    InMemoryStruct<macho::Symbol64TableEntry> STE;
-    Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
-    if (!STE)
-      return Error("unable to read symbol: '" + Twine(i) + "'");
-    // Get the symbol name.
-    StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-    SymbolNames.push_back(Name);
-
-    // FIXME: Check the symbol type and flags.
-    if (STE->Type != 0xF)  // external, defined in this segment.
-      continue;
-    // Flags in the upper nibble we don't care about.
-    if ((STE->Flags & 0xf) != 0x0)
-      continue;
-
-    // Remember the symbol.
-    uint32_t SectionID = SectionMap[STE->SectionIndex - 1];
-    SymbolTable[Name] = SymbolLoc(SectionID, STE->Value);
-
-    DEBUG(dbgs() << "Symbol: '" << Name << "' @ "
-                 << (getSectionAddress(SectionID) + STE->Value)
-                 << "\n");
-  }
-  return false;
-}
-
-// resolveSymbol - Resolve any relocations to the specified symbol if
-// we know where it lives.
-void RuntimeDyldMachO::resolveSymbol(StringRef Name) {
-  StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
-  if (Loc == SymbolTable.end())
-    return;
-
-  RelocationList &Relocs = UnresolvedRelocations[Name];
-  DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
-  for (int i = 0, e = Relocs.size(); i != e; ++i) {
-    // Change the relocation to be section relative rather than symbol
-    // relative and move it to the resolved relocation list.
-    RelocationEntry Entry = Relocs[i];
-    Entry.Addend += Loc->second.second;
-    Relocations[Loc->second.first].push_back(Entry);
-  }
-  // FIXME: Keep a worklist of the relocations we've added so that we can
-  // resolve more selectively later.
-  Relocs.clear();
-}
-
-bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
-  // If the linker is in an error state, don't do anything.
-  if (hasError())
-    return true;
-  // Load the Mach-O wrapper object.
-  std::string ErrorStr;
-  OwningPtr<MachOObject> Obj(
-    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
-  if (!Obj)
-    return Error("unable to load object: '" + ErrorStr + "'");
-
-  // Get the CPU type information from the header.
-  const macho::Header &Header = Obj->getHeader();
-
-  // FIXME: Error checking that the loaded object is compatible with
-  //        the system we're running on.
-  CPUType = Header.CPUType;
-  CPUSubtype = Header.CPUSubtype;
-
-  // Validate that the load commands match what we expect.
-  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
-    *DysymtabLCI = 0;
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
-    switch (LCI.Command.Type) {
-    case macho::LCT_Segment:
-    case macho::LCT_Segment64:
-      if (SegmentLCI)
-        return Error("unexpected input object (multiple segments)");
-      SegmentLCI = &LCI;
-      break;
-    case macho::LCT_Symtab:
-      if (SymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      SymtabLCI = &LCI;
-      break;
-    case macho::LCT_Dysymtab:
-      if (DysymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      DysymtabLCI = &LCI;
-      break;
-    default:
-      return Error("unexpected input object (unexpected load command");
+    assert(sIt != sItEnd && "No section containing relocation!");
+    Value.SectionID = findOrEmitSection(*sIt, true, ObjSectionToID);
+    Value.Addend = *(const intptr_t *)Target;
+    if (Value.Addend) {
+      // The MachO addend is offset from the current section, we need set it
+      // as offset from destination section
+      Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
     }
   }
 
-  if (!SymtabLCI)
-    return Error("no symbol table found in object");
-  if (!SegmentLCI)
-    return Error("no segments found in object");
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-  if (!SymtabLC)
-    return Error("unable to load symbol table load command");
-  Obj->RegisterStringTable(*SymtabLC);
-
-  // Read the dynamic link-edit information, if present (not present in static
-  // objects).
-  if (DysymtabLCI) {
-    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
-    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
-    if (!DysymtabLC)
-      return Error("unable to load dynamic link-exit load command");
-
-    // FIXME: We don't support anything interesting yet.
-//    if (DysymtabLC->LocalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: local symbol entries");
-//    if (DysymtabLC->ExternalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
-  }
-
-  // Load the segment load command.
-  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
-    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  } else {
-    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  }
-
-  // Assign the addresses of the sections from the object so that any
-  // relocations to them get set properly.
-  // FIXME: This is done directly from the client at the moment. We should
-  // default the values to the local storage, at least when the target arch
-  // is the same as the host arch.
-
-  return false;
+  if (Arch == Triple::arm && RelType == macho::RIT_ARM_Branch24Bit) {
+    // This is an ARM branch relocation, need to use a stub function.
+
+    //  Look up for existing stub.
+    StubMap::const_iterator stubIt = Stubs.find(Value);
+    if (stubIt != Stubs.end())
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address + stubIt->second,
+                        RelType, 0);
+    else {
+      // Create a new stub function.
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+      AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address,
+                    macho::RIT_Vanilla);
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address + Section.StubOffset,
+                        RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else
+    AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
 }
 
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldMachO::reassignSectionAddress(unsigned SectionID,
-                                              uint64_t Addr) {
-  // The address to use for relocation resolution is not
-  // the address of the local section buffer. We must be doing
-  // a remote execution environment of some sort. Re-apply any
-  // relocations referencing this section with the given address.
-  //
-  // Addr is a uint64_t because we can't assume the pointer width
-  // of the target is the same as that of the host. Just use a generic
-  // "big enough" type.
-
-  SectionLoadAddress[SectionID] = Addr;
-
-  RelocationList &Relocs = Relocations[SectionID];
-  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    RelocationEntry &RE = Relocs[i];
-    uint8_t *Target = (uint8_t*)Sections[RE.SectionID].base() + RE.Offset;
-    bool isPCRel = (RE.Data >> 24) & 1;
-    unsigned Type = (RE.Data >> 28) & 0xf;
-    unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
-    DEBUG(dbgs() << "Resolving relocation at Section #" << RE.SectionID
-          << " + " << RE.Offset << " (" << format("%p", Target) << ")"
-          << " from Section #" << SectionID << " (" << format("%p", Addr) << ")"
-          << "(" << (isPCRel ? "pcrel" : "absolute")
-          << ", type: " << Type << ", Size: " << Size << ", Addend: "
-          << RE.Addend << ").\n");
-
-    resolveRelocation(Target, Addr, isPCRel, Type, Size, RE.Addend);
-  }
-}
 
-bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
   StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
   if (Magic == "\xFE\xED\xFA\xCE") return true;
   if (Magic == "\xCE\xFA\xED\xFE") return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 5798981..36b39dd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -25,68 +25,37 @@ using namespace llvm::object;
 
 namespace llvm {
 class RuntimeDyldMachO : public RuntimeDyldImpl {
-
-  // For each symbol, keep a list of relocations based on it. Anytime
-  // its address is reassigned (the JIT re-compiled the function, e.g.),
-  // the relocations get re-resolved.
-  // The symbol (or section) the relocation is sourced from is the Key
-  // in the relocation list where it's stored.
-  struct RelocationEntry {
-    unsigned    SectionID;  // Section the relocation is contained in.
-    uint64_t    Offset;     // Offset into the section for the relocation.
-    uint32_t    Data;       // Second word of the raw macho relocation entry.
-    int64_t     Addend;     // Addend encoded in the instruction itself, if any,
-                            // plus the offset into the source section for
-                            // the symbol once the relocation is resolvable.
-
-    RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
-      : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
-  };
-  typedef SmallVector<RelocationEntry, 4> RelocationList;
-  // Relocations to sections already loaded. Indexed by SectionID which is the
-  // source of the address. The target where the address will be writen is
-  // SectionID/Offset in the relocation itself.
-  IndexedMap<RelocationList> Relocations;
-  // Relocations to symbols that are not yet resolved. Must be external
-  // relocations by definition. Indexed by symbol name.
-  StringMap<RelocationList> UnresolvedRelocations;
-
-  bool resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel,
-                         unsigned Type, unsigned Size, int64_t Addend);
-  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                               unsigned Type, unsigned Size, int64_t Addend);
-  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                            unsigned Type, unsigned Size, int64_t Addend);
-
-  bool loadSegment32(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool loadSegment64(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool processSymbols32(const MachOObject *Obj,
-                      SmallVectorImpl<unsigned> &SectionMap,
-                      SmallVectorImpl<StringRef> &SymbolNames,
-                      const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool processSymbols64(const MachOObject *Obj,
-                      SmallVectorImpl<unsigned> &SectionMap,
-                      SmallVectorImpl<StringRef> &SymbolNames,
-                      const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
-  void resolveSymbol(StringRef Name);
+protected:
+  bool resolveX86_64Relocation(uint8_t *LocalAddress,
+                               uint64_t FinalAddress,
+                               uint64_t Value,
+                               bool isPCRel,
+                               unsigned Type,
+                               unsigned Size,
+                               int64_t Addend);
+  bool resolveARMRelocation(uint8_t *LocalAddress,
+                            uint64_t FinalAddress,
+                            uint64_t Value,
+                            bool isPCRel,
+                            unsigned Type,
+                            unsigned Size,
+                            int64_t Addend);
+
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs);
 
 public:
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend);
+                                 
   RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
 
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-
-  static bool isKnownFormat(const MemoryBuffer *InputBuffer);
-
-  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
-    return isKnownFormat(InputBuffer);
-  }
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
 };
 
 } // end namespace llvm
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index bd5956f..ab5ddaf 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1528,6 +1528,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
     }
     Lex();
   }
+  // If there weren't any arguments, erase the token vector so everything
+  // else knows that. Leaving around the vestigal empty token list confuses
+  // things.
+  if (MacroArguments.size() == 1 && MacroArguments.back().empty())
+    MacroArguments.clear();
 
   // Macro instantiation is lexical, unfortunately. We construct a new buffer
   // to hold the macro body with substitutions.
@@ -1624,6 +1629,8 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
       return Error(EqualLoc, "Recursive use of '" + Name + "'");
     else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
       ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+      ; // Allow redefinitions of variables that haven't yet been used.
     else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
       return Error(EqualLoc, "redefinition of '" + Name + "'");
     else if (!Sym->isVariable())
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index c2fad167..e013e77 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -54,17 +54,14 @@ const MCSymbol &MCSymbol::AliasedSymbol() const {
 void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
-  assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
-         "Invalid redefinition!");
   this->Value = Value;
 
   // Variables should always be marked as in the same "section" as the value.
   const MCSection *Section = Value->FindAssociatedSection();
-  if (Section) {
+  if (Section)
     setSection(*Section);
-  } else {
+  else
     setUndefined();
-  }
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 7144e68..f706cac 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -783,9 +783,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     }
 
     if (Sec->Relocations.size() > 0) {
-      Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+      if (RelocationsOverflow) {
+        // Signal overflow by setting NumberOfSections to max value. Actual
+        // size is found in reloc #0. Microsoft tools understand this.
+        Sec->Header.NumberOfRelocations = 0xffff;
+      } else {
+        Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      }
       Sec->Header.PointerToRelocations = offset;
 
+      if (RelocationsOverflow) {
+        // Reloc #0 will contain actual count, so make room for it.
+        offset += COFF::RelocationSize;
+      }
+
       offset += COFF::RelocationSize * Sec->Relocations.size();
 
       for (relocations::iterator cr = Sec->Relocations.begin(),
@@ -820,8 +833,12 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     MCAssembler::const_iterator j, je;
 
     for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
-      if ((*i)->Number != -1)
+      if ((*i)->Number != -1) {
+        if ((*i)->Relocations.size() >= 0xffff) {
+          (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+        }
         WriteSectionHeader((*i)->Header);
+      }
 
     for (i = Sections.begin(), ie = Sections.end(),
          j = Asm.begin(), je = Asm.end();
@@ -841,6 +858,16 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
         assert(OS.tell() == (*i)->Header.PointerToRelocations &&
                "Section::PointerToRelocations is insane!");
 
+        if ((*i)->Relocations.size() >= 0xffff) {
+          // In case of overflow, write actual relocation count as first
+          // relocation. Including the synthetic reloc itself (+ 1).
+          COFF::relocation r;
+          r.VirtualAddress = (*i)->Relocations.size() + 1;
+          r.SymbolTableIndex = 0;
+          r.Type = 0;
+          WriteRelocation(r);
+        }
+
         for (relocations::const_iterator k = (*i)->Relocations.begin(),
                                                ke = (*i)->Relocations.end();
                                                k != ke; k++) {
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index b67377c..c5f15ba 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -174,7 +174,7 @@ error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
 }
 
 Archive::Archive(MemoryBuffer *source, error_code &ec)
-  : Binary(Binary::isArchive, source) {
+  : Binary(Binary::ID_Archive, source) {
   // Check for sufficient magic.
   if (!source || source->getBufferSize()
                  < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index a3fdd5b..b8ba905 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/COFF.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -300,24 +301,7 @@ error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
 error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
                                           StringRef &Result) const {
   const coff_section *sec = toSec(Sec);
-  StringRef name;
-  if (sec->Name[7] == 0)
-    // Null terminated, let ::strlen figure out the length.
-    name = sec->Name;
-  else
-    // Not null terminated, use all 8 bytes.
-    name = StringRef(sec->Name, 8);
-
-  // Check for string table entry. First byte is '/'.
-  if (name[0] == '/') {
-    uint32_t Offset;
-    name.substr(1).getAsInteger(10, Offset);
-    if (error_code ec = getString(Offset, name))
-      return ec;
-  }
-
-  Result = name;
-  return object_error::success;
+  return getSectionName(sec, Result);
 }
 
 error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
@@ -337,16 +321,10 @@ error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
 error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
                                               StringRef &Result) const {
   const coff_section *sec = toSec(Sec);
-  // The only thing that we need to verify is that the contents is contained
-  // within the file bounds. We don't need to make sure it doesn't cover other
-  // data, as there's nothing that says that is not allowed.
-  uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
-  uintptr_t con_end = con_start + sec->SizeOfRawData;
-  if (con_end > uintptr_t(Data->getBufferEnd()))
-    return object_error::parse_failed;
-  Result = StringRef(reinterpret_cast<const char*>(con_start),
-                     sec->SizeOfRawData);
-  return object_error::success;
+  ArrayRef<uint8_t> Res;
+  error_code EC = getSectionContents(sec, Res);
+  Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
+  return EC;
 }
 
 error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
@@ -421,7 +399,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 }
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
-  : ObjectFile(Binary::isCOFF, Object, ec)
+  : ObjectFile(Binary::ID_COFF, Object, ec)
   , Header(0)
   , SectionTable(0)
   , SymbolTable(0)
@@ -630,6 +608,43 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
   return object_error::success;
 }
 
+error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+                                          StringRef &Res) const {
+  StringRef Name;
+  if (Sec->Name[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    Name = Sec->Name;
+  else
+    // Not null terminated, use all 8 bytes.
+    Name = StringRef(Sec->Name, 8);
+
+  // Check for string table entry. First byte is '/'.
+  if (Name[0] == '/') {
+    uint32_t Offset;
+    if (Name.substr(1).getAsInteger(10, Offset))
+      return object_error::parse_failed;
+    if (error_code ec = getString(Offset, Name))
+      return ec;
+  }
+
+  Res = Name;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
+                                              ArrayRef<uint8_t> &Res) const {
+  // The only thing that we need to verify is that the contents is contained
+  // within the file bounds. We don't need to make sure it doesn't cover other
+  // data, as there's nothing that says that is not allowed.
+  uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+  uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
+  if (ConEnd > uintptr_t(Data->getBufferEnd()))
+    return object_error::parse_failed;
+  Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
+                          Sec->SizeOfRawData);
+  return object_error::success;
+}
+
 const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
   return reinterpret_cast<const coff_relocation*>(Rel.p);
 }
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 655c40a..819409e 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -30,7 +30,7 @@ namespace object {
 
 MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
                                  error_code &ec)
-    : ObjectFile(Binary::isMachO, Object, ec),
+    : ObjectFile(Binary::ID_MachO, Object, ec),
       MachOObj(MOO),
       RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
   DataRefImpl DRI;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 031bbb8..9b81fe7 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -457,16 +457,6 @@ APInt APInt::XorSlowCase(const APInt& RHS) const {
   return APInt(val, getBitWidth()).clearUnusedBits();
 }
 
-bool APInt::operator !() const {
-  if (isSingleWord())
-    return !VAL;
-
-  for (unsigned i = 0; i < getNumWords(); ++i)
-    if (pVal[i])
-      return false;
-  return true;
-}
-
 APInt APInt::operator*(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
@@ -494,12 +484,6 @@ APInt APInt::operator-(const APInt& RHS) const {
   return Result.clearUnusedBits();
 }
 
-bool APInt::operator[](unsigned bitPosition) const {
-  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
-  return (maskBit(bitPosition) &
-          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
-}
-
 bool APInt::EqualSlowCase(const APInt& RHS) const {
   // Get some facts about the number of bits used in the two operands.
   unsigned n1 = getActiveBits();
@@ -722,20 +706,9 @@ unsigned APInt::countLeadingZerosSlowCase() const {
   return Count;
 }
 
-static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
-  unsigned Count = 0;
-  if (skip)
-    V <<= skip;
-  while (V && (V & (1ULL << 63))) {
-    Count++;
-    V <<= 1;
-  }
-  return Count;
-}
-
 unsigned APInt::countLeadingOnes() const {
   if (isSingleWord())
-    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+    return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
 
   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
   unsigned shift;
@@ -746,13 +719,13 @@ unsigned APInt::countLeadingOnes() const {
     shift = APINT_BITS_PER_WORD - highWordBits;
   }
   int i = getNumWords() - 1;
-  unsigned Count = countLeadingOnes_64(pVal[i], shift);
+  unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
       if (pVal[i] == -1ULL)
         Count += APINT_BITS_PER_WORD;
       else {
-        Count += countLeadingOnes_64(pVal[i], 0);
+        Count += CountLeadingOnes_64(pVal[i]);
         break;
       }
     }
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index d1ec4b0..e6fdf16 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1191,7 +1191,7 @@ printOptionNoValue(const Option &O, size_t GlobalWidth) const {
 static int OptNameCompare(const void *LHS, const void *RHS) {
   typedef std::pair<const char *, Option*> pair_ty;
 
-  return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+  return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
 }
 
 // Copy Options into a vector so we can sort them as we like.
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 0dba28a..32126ec 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,12 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
+static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
+  cl::desc("Execute graph viewer in the background. Creates tmp file litter."));
+
 std::string llvm::DOT::EscapeString(const std::string &Label) {
   std::string Str(Label);
   for (unsigned i = 0; i != Str.length(); ++i)
@@ -49,10 +53,28 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
   return Str;
 }
 
-
+// Execute the graph viewer. Return true if successful.
+static bool LLVM_ATTRIBUTE_UNUSED
+ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
+                const sys::Path &Filename, bool wait, std::string &ErrMsg) {
+  if (wait) {
+    if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+      errs() << "Error: " << ErrMsg << "\n";
+      return false;
+    }
+    Filename.eraseFromDisk();
+    errs() << " done. \n";
+  }
+  else {
+    sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+    errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+  }
+  return true;
+}
 
 void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
                         GraphProgram::Name program) {
+  wait &= !ViewBackground;
   std::string ErrMsg;
 #if HAVE_GRAPHVIZ
   sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
@@ -61,14 +83,10 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(Graphviz.c_str());
   args.push_back(Filename.c_str());
   args.push_back(0);
-  
+
   errs() << "Running 'Graphviz' program... ";
-  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
     return;
-  }
-  Filename.eraseFromDisk();
-  errs() << " done. \n";
 
 #elif HAVE_XDOT_PY
   std::vector<const char*> args;
@@ -83,17 +101,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
   default: errs() << "Unknown graph layout name; using default.\n";
   }
-  
+
   args.push_back(0);
 
   errs() << "Running 'xdot.py' program... ";
-  if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
-                                   &args[0],0,0,0,0,&ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg))
     return;
-  }
-  Filename.eraseFromDisk();
-  errs() << " done. \n";
 
 #elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
                    HAVE_TWOPI || HAVE_CIRCO))
@@ -150,14 +163,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back("-o");
   args.push_back(PSFilename.c_str());
   args.push_back(0);
-  
+
   errs() << "Running '" << prog.str() << "' program... ";
 
-  if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
     return;
-  }
-  errs() << " done. \n";
 
   sys::Path gv(LLVM_PATH_GV);
   args.clear();
@@ -165,19 +175,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(PSFilename.c_str());
   args.push_back("--spartan");
   args.push_back(0);
-  
+
   ErrMsg.clear();
-  if (wait) {
-     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
-        errs() << "Error: " << ErrMsg << "\n";
-     Filename.eraseFromDisk();
-     PSFilename.eraseFromDisk();
-  }
-  else {
-     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
-     errs() << "Remember to erase graph files: " << Filename.str() << " "
-            << PSFilename.str() << "\n";
-  }
+  if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
+    return;
+
 #elif HAVE_DOTTY
   sys::Path dotty(LLVM_PATH_DOTTY);
 
@@ -185,16 +187,13 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(dotty.c_str());
   args.push_back(Filename.c_str());
   args.push_back(0);
-  
-  errs() << "Running 'dotty' program... ";
-  if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
-     errs() << "Error: " << ErrMsg << "\n";
-  } else {
+
 // Dotty spawns another app and doesn't wait until it returns
 #if defined (__MINGW32__) || defined (_WINDOWS)
-    return;
+  wait = false;
 #endif
-    Filename.eraseFromDisk();
-  }
+  errs() << "Running 'dotty' program... ";
+  if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
+    return;
 #endif
 }
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 4b15587..911a03f 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -336,7 +336,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
       // Error while reading.
       return error_code(errno, posix_category());
     }
-    assert(NumRead != 0 && "fstat reported an invalid file size.");
+    if (NumRead == 0) {
+      assert(0 && "We got inaccurate FileSize value or fstat reported an "
+                   "invalid file size.");
+      break;
+    }
     BytesLeft -= NumRead;
     BufPtr += NumRead;
   }
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 997ce0b..68d9c29 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/MathExtras.h"
+#include <algorithm>
 #include <cstdlib>
 
 using namespace llvm;
@@ -223,6 +224,56 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   NumTombstones = RHS.NumTombstones;
 }
 
+void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) {
+  if (this == &RHS) return;
+
+  // We can only avoid copying elements if neither set is small.
+  if (!this->isSmall() && !RHS.isSmall()) {
+    std::swap(this->CurArray, RHS.CurArray);
+    std::swap(this->CurArraySize, RHS.CurArraySize);
+    std::swap(this->NumElements, RHS.NumElements);
+    std::swap(this->NumTombstones, RHS.NumTombstones);
+    return;
+  }
+
+  // FIXME: From here on we assume that both sets have the same small size.
+
+  // If only RHS is small, copy the small elements into LHS and move the pointer
+  // from LHS to RHS.
+  if (!this->isSmall() && RHS.isSmall()) {
+    std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize,
+              this->SmallArray);
+    std::swap(this->NumElements, RHS.NumElements);
+    std::swap(this->CurArraySize, RHS.CurArraySize);
+    RHS.CurArray = this->CurArray;
+    RHS.NumTombstones = this->NumTombstones;
+    this->CurArray = this->SmallArray;
+    this->NumTombstones = 0;
+    return;
+  }
+
+  // If only LHS is small, copy the small elements into RHS and move the pointer
+  // from RHS to LHS.
+  if (this->isSmall() && !RHS.isSmall()) {
+    std::copy(this->SmallArray, this->SmallArray+this->CurArraySize,
+              RHS.SmallArray);
+    std::swap(RHS.NumElements, this->NumElements);
+    std::swap(RHS.CurArraySize, this->CurArraySize);
+    this->CurArray = RHS.CurArray;
+    this->NumTombstones = RHS.NumTombstones;
+    RHS.CurArray = RHS.SmallArray;
+    RHS.NumTombstones = 0;
+    return;
+  }
+
+  // Both a small, just swap the small elements.
+  assert(this->isSmall() && RHS.isSmall());
+  assert(this->CurArraySize == RHS.CurArraySize);
+  std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize,
+                   RHS.SmallArray);
+  std::swap(this->NumElements, RHS.NumElements);
+}
+
 SmallPtrSetImpl::~SmallPtrSetImpl() {
   if (!isSmall())
     free(CurArray);
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 1c28bf8..abe570f 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -285,8 +285,8 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
 
 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
 /// sequence of radix up to 36 to an unsigned long long value.
-static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
-                                 unsigned long long &Result) {
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                unsigned long long &Result) {
   // Autosense radix if not specified.
   if (Radix == 0)
     Radix = GetAutoSenseRadix(Str);
@@ -326,17 +326,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
   return false;
 }
 
-bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
-  return GetAsUnsignedInteger(*this, Radix, Result);
-}
-
-
-bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+                              long long &Result) {
   unsigned long long ULLVal;
 
   // Handle positive strings first.
-  if (empty() || front() != '-') {
-    if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+  if (Str.empty() || Str.front() != '-') {
+    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
         // Check for value so large it overflows a signed value.
         (long long)ULLVal < 0)
       return true;
@@ -345,7 +341,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
   }
 
   // Get the positive part of the value.
-  if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
       // Reject values so large they'd overflow as negative signed, but allow
       // "-0".  This negates the unsigned so that the negative isn't undefined
       // on signed overflow.
@@ -356,24 +352,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
   return false;
 }
 
-bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
-  long long Val;
-  if (getAsInteger(Radix, Val) ||
-      (int)Val != Val)
-    return true;
-  Result = Val;
-  return false;
-}
-
-bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
-  unsigned long long Val;
-  if (getAsInteger(Radix, Val) ||
-      (unsigned)Val != Val)
-    return true;
-  Result = Val;
-  return false;
-}
-
 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   StringRef Str = *this;
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 94333a3..d261c53 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -29,6 +29,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case msp430:  return "msp430";
   case ppc64:   return "powerpc64";
   case ppc:     return "powerpc";
+  case r600:    return "r600";
   case sparc:   return "sparc";
   case sparcv9: return "sparcv9";
   case tce:     return "tce";
@@ -63,6 +64,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
 
   case hexagon:   return "hexagon";
 
+  case r600:    return "r600";
+
   case sparcv9:
   case sparc:   return "sparc";
 
@@ -145,6 +148,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("ppc32", ppc)
     .Case("ppc", ppc)
     .Case("mblaze", mblaze)
+    .Case("r600", r600)
     .Case("hexagon", hexagon)
     .Case("sparc", sparc)
     .Case("sparcv9", sparcv9)
@@ -184,6 +188,7 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
     // This is derived from the driver driver.
     .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
     .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
+    .Case("r600", Triple::r600)
     .Case("ptx32", Triple::ptx32)
     .Case("ptx64", Triple::ptx64)
     .Case("amdil", Triple::amdil)
@@ -206,6 +211,7 @@ const char *Triple::getArchNameForAssembler() {
     .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
     .Cases("armv6", "thumbv6", "armv6")
     .Cases("armv7", "thumbv7", "armv7")
+    .Case("r600", "r600")
     .Case("ptx32", "ptx32")
     .Case("ptx64", "ptx64")
     .Case("le32", "le32")
@@ -234,6 +240,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
     .Cases("mips64", "mips64eb", Triple::mips64)
     .Case("mips64el", Triple::mips64el)
+    .Case("r600", Triple::r600)
     .Case("hexagon", Triple::hexagon)
     .Case("sparc", Triple::sparc)
     .Case("sparcv9", Triple::sparcv9)
@@ -641,6 +648,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::mipsel:
   case llvm::Triple::ppc:
   case llvm::Triple::ptx32:
+  case llvm::Triple::r600:
   case llvm::Triple::sparc:
   case llvm::Triple::tce:
   case llvm::Triple::thumb:
@@ -689,6 +697,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::mipsel:
   case Triple::ppc:
   case Triple::ptx32:
+  case Triple::r600:
   case Triple::sparc:
   case Triple::tce:
   case Triple::thumb:
@@ -718,6 +727,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::le32:
   case Triple::mblaze:
   case Triple::msp430:
+  case Triple::r600:
   case Triple::tce:
   case Triple::thumb:
   case Triple::xcore:
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 12d1b1a..93eed24 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1726,12 +1726,6 @@ void Record::setName(Init *NewName) {
   }  // Otherwise this isn't yet registered.
   Name = NewName;
   checkName();
-  // Since the Init for the name was changed, see if we can resolve
-  // any of it using members of the Record.
-  Init *ComputedName = Name->resolveReferences(*this, 0);
-  if (ComputedName != Name) {
-    setName(ComputedName);
-  }
   // DO NOT resolve record values to the name at this point because
   // there might be default values for arguments of this def.  Those
   // arguments might not have been resolved yet so we don't want to
@@ -1754,6 +1748,8 @@ void Record::setName(const std::string &Name) {
 /// references.
 void Record::resolveReferencesTo(const RecordVal *RV) {
   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (RV == &Values[i]) // Skip resolve the same field as the given one
+      continue;
     if (Init *V = Values[i].getValue())
       Values[i].setValue(V->resolveReferences(*this, RV));
   }
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index acb57f7..2a1e8e4 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -18,9 +18,7 @@
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include <cassert>
 
 namespace llvm {
 
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 4ec19cc..ca30716 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
 #include "ARMAsmPrinter.h"
+#include "ARM.h"
 #include "ARMBuildAttrs.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMConstantPoolValue.h"
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 75b796e..366e2fa 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -935,6 +935,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
         MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+          MIB.addReg(DestReg, RegState::ImplicitDefine);
       }
     } else
       llvm_unreachable("Unknown reg class!");
@@ -953,6 +955,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
       MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
       MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+        MIB.addReg(DestReg, RegState::ImplicitDefine);
     } else
       llvm_unreachable("Unknown reg class!");
     break;
@@ -2756,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8PseudoWB_fixed:
-    case ARM::VLD1DUPq16PseudoWB_fixed:
-    case ARM::VLD1DUPq32PseudoWB_fixed:
-    case ARM::VLD1DUPq8PseudoWB_register:
-    case ARM::VLD1DUPq16PseudoWB_register:
-    case ARM::VLD1DUPq32PseudoWB_register:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8PseudoWB_fixed:
-    case ARM::VLD2DUPd16PseudoWB_fixed:
-    case ARM::VLD2DUPd32PseudoWB_fixed:
-    case ARM::VLD2DUPd8PseudoWB_register:
-    case ARM::VLD2DUPd16PseudoWB_register:
-    case ARM::VLD2DUPd32PseudoWB_register:
+    case ARM::VLD1DUPq8:
+    case ARM::VLD1DUPq16:
+    case ARM::VLD1DUPq32:
+    case ARM::VLD1DUPq8wb_fixed:
+    case ARM::VLD1DUPq16wb_fixed:
+    case ARM::VLD1DUPq32wb_fixed:
+    case ARM::VLD1DUPq8wb_register:
+    case ARM::VLD1DUPq16wb_register:
+    case ARM::VLD1DUPq32wb_register:
+    case ARM::VLD2DUPd8:
+    case ARM::VLD2DUPd16:
+    case ARM::VLD2DUPd32:
+    case ARM::VLD2DUPd8wb_fixed:
+    case ARM::VLD2DUPd16wb_fixed:
+    case ARM::VLD2DUPd32wb_fixed:
+    case ARM::VLD2DUPd8wb_register:
+    case ARM::VLD2DUPd16wb_register:
+    case ARM::VLD2DUPd32wb_register:
     case ARM::VLD4DUPd8Pseudo:
     case ARM::VLD4DUPd16Pseudo:
     case ARM::VLD4DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d2aff9a..291369f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMBaseRegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMFrameLowering.h"
 #include "ARMInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
@@ -79,6 +79,7 @@ getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
   Reserved.set(ARM::PC);
+  Reserved.set(ARM::FPSCR);
   if (TFI->hasFP(MF))
     Reserved.set(FramePtr);
   if (hasBasePointer(MF))
@@ -492,8 +493,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // When outgoing call frames are so large that we adjust the stack pointer
   // around the call, we can no longer use the stack pointer to reach the
   // emergency spill slot.
-  if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() ||
-                                    !TFI->hasReservedCallFrame(MF)))
+  if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
     return true;
 
   // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -517,7 +517,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 }
 
 bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
   const MachineRegisterInfo *MRI = &MF.getRegInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   // We can't realign the stack if:
@@ -532,8 +531,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   // register allocation with frame pointer elimination, it is too late now.
   if (!MRI->canReserveReg(FramePtr))
     return false;
-  // We may also need a base pointer if there are dynamic allocas.
-  if (!MFI->hasVarSizedObjects())
+  // We may also need a base pointer if there are dynamic allocas or stack
+  // pointer adjustments around calls.
+  if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
     return true;
   if (!EnableBasePointer)
     return false;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 437b4c7..2b9c55d 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -15,13 +15,13 @@
 #ifndef ARMCALLINGCONV_H
 #define ARMCALLINGCONV_H
 
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
 
@@ -29,7 +29,7 @@ namespace llvm {
 static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                           CCValAssign::LocInfo &LocInfo,
                           CCState &State, bool CanFail) {
-  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+  static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   // Try to get the first register.
   if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +72,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                            CCValAssign::LocInfo &LocInfo,
                            CCState &State, bool CanFail) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+  static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+  static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+  static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
 
   unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
   if (Reg == 0) {
@@ -118,8 +118,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 
 static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                          CCValAssign::LocInfo &LocInfo, CCState &State) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+  static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+  static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
 
   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
   if (Reg == 0)
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c4ab99d..c2b7816 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -99,8 +99,8 @@ namespace {
   // Entries for NEON load/store information table.  The table is sorted by
   // PseudoOpc for fast binary-search lookups.
   struct NEONLdStTableEntry {
-    unsigned PseudoOpc;
-    unsigned RealOpc;
+    uint16_t PseudoOpc;
+    uint16_t RealOpc;
     bool IsLoad;
     bool isUpdating;
     bool hasWritebackOperand;
@@ -129,16 +129,6 @@ namespace {
 }
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false,  SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true,  SingleSpc, 2, 2,false},
-{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_fixed,  ARM::VLD1DUPq8wb_fixed, true, true, false,  SingleSpc, 2, 8,false},
-{ ARM::VLD1DUPq8PseudoWB_register,  ARM::VLD1DUPq8wb_register, true, true, true,  SingleSpc, 2, 8,false},
-
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
 { ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, false, EvenDblSpc, 1, 2 ,true},
@@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
 
-{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, false, SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false,  SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true,  SingleSpc, 2, 4,false},
-{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, false, SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false,  SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true,  SingleSpc, 2, 2,false},
-{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, false, SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_fixed,  ARM::VLD2DUPd8wb_fixed, true, true, false,  SingleSpc, 2, 8,false},
-{ ARM::VLD2DUPd8PseudoWB_register,  ARM::VLD2DUPd8wb_register, true, true, true,  SingleSpc, 2, 8,false},
-
 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
 { ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, false, SingleSpc,  2, 2 ,true},
@@ -345,7 +325,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
 /// load or store pseudo instruction.
 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
-  unsigned NumEntries = array_lengthof(NEONLdStTable);
+  const unsigned NumEntries = array_lengthof(NEONLdStTable);
 
 #ifndef NDEBUG
   // Make sure the table is sorted.
@@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8PseudoWB_fixed:
-    case ARM::VLD1DUPq16PseudoWB_fixed:
-    case ARM::VLD1DUPq32PseudoWB_fixed:
-    case ARM::VLD1DUPq8PseudoWB_register:
-    case ARM::VLD1DUPq16PseudoWB_register:
-    case ARM::VLD1DUPq32PseudoWB_register:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8PseudoWB_fixed:
-    case ARM::VLD2DUPd16PseudoWB_fixed:
-    case ARM::VLD2DUPd32PseudoWB_fixed:
-    case ARM::VLD2DUPd8PseudoWB_register:
-    case ARM::VLD2DUPd16PseudoWB_register:
-    case ARM::VLD2DUPd32PseudoWB_register:
     case ARM::VLD3DUPd8Pseudo:
     case ARM::VLD3DUPd16Pseudo:
     case ARM::VLD3DUPd32Pseudo:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 818b202..a24eab4 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -1384,7 +1384,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
         SrcVT == MVT::i1) {
       const APInt &CIVal = ConstInt->getValue();
       Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
-      if (Imm < 0) {
+      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+      // then a cmn, because there is no way to represent 2147483648 as a 
+      // signed 32-bit int.
+      if (Imm < 0 && Imm != (int)0x80000000) {
         isNegativeImm = true;
         Imm = -Imm;
       }
@@ -1475,7 +1478,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
 
 bool ARMFastISel::SelectCmp(const Instruction *I) {
   const CmpInst *CI = cast<CmpInst>(I);
-  Type *Ty = CI->getOperand(0)->getType();
 
   // Get the compare predicate.
   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
@@ -1495,11 +1497,10 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
   unsigned DestReg = createResultReg(RC);
   Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
   unsigned ZeroReg = TargetMaterializeConstant(Zero);
-  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
-  unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR;
+  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
           .addReg(ZeroReg).addImm(1)
-          .addImm(ARMPred).addReg(CondReg);
+          .addImm(ARMPred).addReg(ARM::CPSR);
 
   UpdateValueMap(I, DestReg);
   return true;
@@ -1851,6 +1852,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
 
+  // Check that we can handle all of the arguments. If we can't, then bail out
+  // now before we add code to the MBB.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    MVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // We don't handle NEON/vector parameters yet.
+    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+      return false;
+
+    // Now copy/store arg to correct locations.
+    if (VA.isRegLoc() && !VA.needsCustom()) {
+      continue;
+    } else if (VA.needsCustom()) {
+      // TODO: We need custom lowering for vector (v2f64) args.
+      if (VA.getLocVT() != MVT::f64 ||
+          // TODO: Only handle register args for now.
+          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+        return false;
+    } else {
+      switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+      default:
+        return false;
+      case MVT::i1:
+      case MVT::i8:
+      case MVT::i16:
+      case MVT::i32:
+        break;
+      case MVT::f32:
+        if (!Subtarget->hasVFP2())
+          return false;
+        break;
+      case MVT::f64:
+        if (!Subtarget->hasVFP2())
+          return false;
+        break;
+      }
+    }
+  }
+
+  // At the point, we are able to handle the call's arguments in fast isel.
+
   // Get a count of how many bytes are to be pushed on the stack.
   NumBytes = CCInfo.getNextStackOffset();
 
@@ -1866,9 +1909,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
     unsigned Arg = ArgRegs[VA.getValNo()];
     MVT ArgVT = ArgVTs[VA.getValNo()];
 
-    // We don't handle NEON/vector parameters yet.
-    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
-      return false;
+    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+           "We don't handle NEON/vector parameters yet.");
 
     // Handle arg promotion, etc.
     switch (VA.getLocInfo()) {
@@ -1908,12 +1950,13 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       RegArgs.push_back(VA.getLocReg());
     } else if (VA.needsCustom()) {
       // TODO: We need custom lowering for vector (v2f64) args.
-      if (VA.getLocVT() != MVT::f64) return false;
+      assert(VA.getLocVT() == MVT::f64 &&
+             "Custom lowering for v2f64 args not available");
 
       CCValAssign &NextVA = ArgLocs[++i];
 
-      // TODO: Only handle register args for now.
-      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+             "We only handle register args!");
 
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                               TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1929,9 +1972,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       Addr.Base.Reg = ARM::SP;
       Addr.Offset = VA.getLocMemOffset();
 
-      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+      assert(EmitRet && "Could not emit a store for argument!");
     }
   }
+
   return true;
 }
 
@@ -2136,7 +2181,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   // TODO: Turn this into the table of arm call ops.
   MachineInstrBuilder MIB;
   unsigned CallOpc = ARMSelectCallOp(NULL);
-  if(isThumb2)
+  if (isThumb2)
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc)))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0fd6025..bd4b2a9 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -501,7 +501,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
 
   // SP can move around if there are allocas.  We may also lose track of SP
   // when emergency spilling inside a non-reserved call frame setup.
-  bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF);
+  bool hasMovingSP = !hasReservedCallFrame(MF);
 
   // When dynamically realigning the stack, use the frame pointer for
   // parameters, and the stack/base pointer for locals.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c99db98..ffb9acb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
 
-  case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register;
-  case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register;
-  case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register;
+  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
   }
   return Opc; // If not one we handle, return it unchanged.
 }
@@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
-                           ARM::VLD2DUPd32Pseudo };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+                           ARM::VLD2DUPd32 };
     return SelectVLDDup(N, false, 2, Opcodes);
   }
 
@@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP_UPD: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed,
-                           ARM::VLD2DUPd16PseudoWB_fixed,
-                           ARM::VLD2DUPd32PseudoWB_fixed };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+                           ARM::VLD2DUPd32wb_fixed };
     return SelectVLDDup(N, true, 2, Opcodes);
   }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 477b5f4..e26dd22 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
 #include "ARM.h"
 #include "ARMCallingConv.h"
 #include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMPerfectShuffle.h"
 #include "ARMRegisterInfo.h"
@@ -49,7 +49,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <sstream>
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
@@ -87,7 +86,7 @@ namespace {
 }
 
 // The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3
 };
 
@@ -456,6 +455,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
   }
 
+  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -3673,6 +3674,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   return Result;
 }
 
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+                                           const ARMSubtarget *ST) const {
+  if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+    return SDValue();
+
+  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+  assert(Op.getValueType() == MVT::f32 &&
+         "ConstantFP custom lowering should only occur for f32.");
+
+  APFloat FPVal = CFP->getValueAPF();
+  int ImmVal = ARM_AM::getFP32Imm(FPVal);
+  if (ImmVal == -1)
+    return SDValue();
+
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+  SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+                     DAG.getConstant(0, MVT::i32));
+}
+
 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
 /// valid vector constant for a NEON instruction with a "modified immediate"
 /// operand (e.g., VMOV).  If so, return the encoded value.
@@ -5109,6 +5131,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
+  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
@@ -6842,33 +6865,63 @@ static SDValue PerformMULCombine(SDNode *N,
   if (!C)
     return SDValue();
 
-  uint64_t MulAmt = C->getZExtValue();
+  int64_t MulAmt = C->getSExtValue();
   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
   ShiftAmt = ShiftAmt & (32 - 1);
   SDValue V = N->getOperand(0);
   DebugLoc DL = N->getDebugLoc();
 
   SDValue Res;
   MulAmt >>= ShiftAmt;
-  if (isPowerOf2_32(MulAmt - 1)) {
-    // (mul x, 2^N + 1) => (add (shl x, N), x)
-    Res = DAG.getNode(ISD::ADD, DL, VT,
-                      V, DAG.getNode(ISD::SHL, DL, VT,
-                                     V, DAG.getConstant(Log2_32(MulAmt-1),
-                                                        MVT::i32)));
-  } else if (isPowerOf2_32(MulAmt + 1)) {
-    // (mul x, 2^N - 1) => (sub (shl x, N), x)
-    Res = DAG.getNode(ISD::SUB, DL, VT,
-                      DAG.getNode(ISD::SHL, DL, VT,
-                                  V, DAG.getConstant(Log2_32(MulAmt+1),
-                                                     MVT::i32)),
-                                                     V);
-  } else
-    return SDValue();
+
+  if (MulAmt >= 0) {
+    if (isPowerOf2_32(MulAmt - 1)) {
+      // (mul x, 2^N + 1) => (add (shl x, N), x)
+      Res = DAG.getNode(ISD::ADD, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmt - 1),
+                                                    MVT::i32)));
+    } else if (isPowerOf2_32(MulAmt + 1)) {
+      // (mul x, 2^N - 1) => (sub (shl x, N), x)
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmt + 1),
+                                                    MVT::i32)),
+                        V);
+    } else
+      return SDValue();
+  } else {
+    uint64_t MulAmtAbs = -MulAmt;
+    if (isPowerOf2_32(MulAmtAbs + 1)) {
+      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
+                                                    MVT::i32)));
+    } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+      Res = DAG.getNode(ISD::ADD, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
+                                                    MVT::i32)));
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        DAG.getConstant(0, MVT::i32),Res);
+
+    } else
+      return SDValue();
+  }
 
   if (ShiftAmt != 0)
-    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
-                      DAG.getConstant(ShiftAmt, MVT::i32));
+    Res = DAG.getNode(ISD::SHL, DL, VT,
+                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
 
   // Do not add new nodes to DAG combiner worklist.
   DCI.CombineTo(N, Res, false);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7f12293..a71b74e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef ARMISELLOWERING_H
 #define ARMISELLOWERING_H
 
+#include "ARM.h"
 #include "ARMSubtarget.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -434,6 +435,8 @@ namespace llvm {
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+                            const ARMSubtarget *ST) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) const;
 
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 7bedf30..72af535 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -14,11 +14,11 @@
 #ifndef ARMINSTRUCTIONINFO_H
 #define ARMINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "ARM.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 0b1406e..8196582 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -637,6 +637,7 @@ def BitfieldAsmOperand : AsmOperandClass {
   let Name = "Bitfield";
   let ParserMethod = "parseBitfield";
 }
+
 def bf_inv_mask_imm : Operand<i32>,
                       PatLeaf<(imm), [{
   return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -4084,74 +4085,43 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $Rd">;
 
-let isCodeGenOnly = 1 in {
 // Conditional instructions
-multiclass AsI1_bincc_irs<bits<4> opcod, string opc,
-                   InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
-  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
-               iii, opc, "\t$Rd, $Rn, $imm", []>,
-               RegConstraint<"$Rn = $Rd"> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> imm;
-    let Inst{25} = 1;
-    let Inst{19-16} = Rn;
-    let Inst{15-12} = Rd;
-    let Inst{11-0} = imm;
-  }
-  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
-               iir, opc, "\t$Rd, $Rn, $Rm", []>,
-               RegConstraint<"$Rn = $Rd"> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<4> Rm;
-    let Inst{25} = 0;
-    let Inst{19-16} = Rn;
-    let Inst{15-12} = Rd;
-    let Inst{11-4} = 0b00000000;
-    let Inst{3-0} = Rm;
-  }
-
-  def rsi : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
-               iis, opc, "\t$Rd, $Rn, $shift", []>,
-               RegConstraint<"$Rn = $Rd"> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> shift;
-    let Inst{25} = 0;
-    let Inst{19-16} = Rn;
-    let Inst{15-12} = Rd;
-    let Inst{11-5} = shift{11-5};
-    let Inst{4} = 0;
-    let Inst{3-0} = shift{3-0};
-  }
-
-  def rsr : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
-               iis, opc, "\t$Rd, $Rn, $shift", []>,
-               RegConstraint<"$Rn = $Rd"> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> shift;
-    let Inst{25} = 0;
-    let Inst{19-16} = Rn;
-    let Inst{15-12} = Rd;
-    let Inst{11-8} = shift{11-8};
-    let Inst{7} = 0;
-    let Inst{6-5} = shift{6-5};
-    let Inst{4} = 1;
-    let Inst{3-0} = shift{3-0};
-  }
-} // AsI1_bincc_irs
-
-defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
-defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+                          Instruction irsr,
+                          InstrItinClass iii, InstrItinClass iir,
+                          InstrItinClass iis> {
+  def ri  : ARMPseudoExpand<(outs GPR:$Rd),
+                            (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+                            4, iii, [],
+                       (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rr  : ARMPseudoExpand<(outs GPR:$Rd),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            4, iir, [],
+                           (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+                            4, iis, [],
+                (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+                       (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+                            4, iis, [],
+                (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 
-} // isCodeGenOnly
 } // neverHasSideEffects
 
+
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics
 //
@@ -4605,10 +4575,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
                          c_imm:$CRm, imm0_7:$opc2),
                     [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                        c_imm:$CRm, 0, pred:$p)>;
 def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
                     (outs GPR:$Rt),
                     (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                          imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                        c_imm:$CRm, 0, pred:$p)>;
 
 def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
              (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4642,10 +4618,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
                            c_imm:$CRm, imm0_7:$opc2),
                       [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                      imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
                       (outs GPR:$Rt),
                       (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                            imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 
 def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
                               imm:$CRm, imm:$opc2),
@@ -5252,6 +5234,20 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
 def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
                    (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
 
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+         Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+            (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+         Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+            (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+         Requires<[IsARM, NoV6]>;
+
 // 'it' blocks in ARM mode just validate the predicates. The IT itself
 // is discarded.
 def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 8684ce1..f61eb2b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass {
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> {
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
   let ParserMatchClass = VecListDPairAsmOperand;
 }
 // Register list of three sequential D registers.
@@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass {
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> {
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
   let ParserMatchClass = VecListDPairSpacedAsmOperand;
 }
 // Register list of three D registers spaced by 2 (three Q registers).
@@ -153,23 +153,24 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
   let ParserMatchClass = VecListOneDAllLanesAsmOperand;
 }
 // Register list of two D registers, with "all lanes" subscripting.
-def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
-  let Name = "VecListTwoDAllLanes";
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairAllLanes";
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
-  let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+def VecListDPairAllLanes : RegisterOperand<DPair,
+                                           "printVectorListTwoAllLanes"> {
+  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
 }
 // Register list of two D registers spaced by 2 (two sequential Q registers).
-def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
-  let Name = "VecListTwoQAllLanes";
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairSpacedAllLanes";
   let ParserMethod = "parseVectorList";
   let RenderMethod = "addVecListOperands";
 }
-def VecListTwoQAllLanes : RegisterOperand<DPR,
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
                                          "printVectorListTwoSpacedAllLanes"> {
-  let ParserMatchClass = VecListTwoQAllLanesAsmOperand;
+  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
 }
 // Register list of three D registers, with "all lanes" subscripting.
 def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
@@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
-  let Pattern = [(set QPR:$dst,
-                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
 def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
 
-def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
           (VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
-          (VLD1DUPq32Pseudo addrmode6:$addr)>;
 
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-
-class VLD1QDUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD1dup,
-          "vld1", Dt, "$Vd, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "",
+          [(set VecListDPairAllLanes:$Vd,
+                (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
 
-def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
 
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // ...with address register writeback:
 multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
   def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
@@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
 }
 multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
   def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
-                     (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
                      (ins addrmode6dup:$Rn), IIC_VLD1dupu,
                      "vld1", Dt, "$Vd, $Rn!",
                      "$Rn.addr = $wb", []> {
@@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
     let AsmMatchConverter = "cvtVLDwbFixed";
   }
   def _register : NLdSt<1, 0b10, 0b1100, op7_4,
-                        (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
                         (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
                         "vld1", Dt, "$Vd, $Rn, $Rm",
                         "$Rn.addr = $wb", []> {
@@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8">;
 defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
 defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
 
-def VLD1DUPq8PseudoWB_fixed     : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
-def VLD1DUPq8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
-
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
 class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
   : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
@@ -1378,18 +1365,14 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
   let DecoderMethod = "DecodeVLD2DupInstruction";
 }
 
-def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListTwoDAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
-def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
-
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListTwoQAllLanes>;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+// ...with double-spaced registers
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
 
 // ...with address register writeback:
 multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
@@ -1414,20 +1397,13 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
   }
 }
 
-defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListTwoDAllLanes>;
-defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>;
-defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>;
-
-defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListTwoQAllLanes>;
-defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>;
-defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>;
+defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes>;
+defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
-def VLD2DUPd8PseudoWB_fixed     : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_fixed    : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_fixed    : VLDQWBfixedPseudo   <IIC_VLD2dupu>;
-def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
 
 //   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
 class VLD3DUP<bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e8984e1..1f7edc1 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -574,7 +574,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                                                     cc_out:$s)>;
 
   // and with the optional destination operand, too.
-  def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"),
+  def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
      (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
                                                     t2_so_imm:$imm, pred:$p,
                                                     cc_out:$s)>;
@@ -2952,45 +2952,36 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
                              (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
                              IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
                  RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
 
-multiclass T2I_bincc_irs<bits<4> opcod, string opc,
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
                    InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
    // shifted imm
-   def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
-                         iii, opc, ".w\t$Rd, $Rn, $imm", []>,
-                         RegConstraint<"$Rn = $Rd"> {
-     let Inst{31-27} = 0b11110;
-     let Inst{25} = 0;
-     let Inst{24-21} = opcod;
-     let Inst{15} = 0;
-   }
+   def ri : t2PseudoExpand<(outs rGPR:$Rd),
+                           (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+                           4, iii, [],
+                  (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
    // register
-   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-                        iir, opc, ".w\t$Rd, $Rn, $Rm", []>,
-                        RegConstraint<"$Rn = $Rd"> {
-     let Inst{31-27} = 0b11101;
-     let Inst{26-25} = 0b01;
-     let Inst{24-21} = opcod;
-     let Inst{14-12} = 0b000; // imm3
-     let Inst{7-6} = 0b00; // imm2
-     let Inst{5-4} = 0b00; // type
-   }
+   def rr : t2PseudoExpand<(outs rGPR:$Rd),
+                           (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+                           4, iir, [],
+                        (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
    // shifted register
-   def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd),
-                                (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
-                                iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>,
-                                RegConstraint<"$Rn = $Rd"> {
-     let Inst{31-27} = 0b11101;
-     let Inst{26-25} = 0b01;
-     let Inst{24-21} = opcod;
-   }
+   def rs : t2PseudoExpand<(outs rGPR:$Rd),
+                       (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+                           4, iis, [],
+            (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
 } // T2I_bincc_irs
 
-defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
-
-} // isCodeGenOnly = 1
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -3768,20 +3759,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
                 c_imm:$CRm, imm0_7:$opc2),
            [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                          imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
              (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
                           c_imm:$CRm, imm0_7:$opc2),
              [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                             imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, 0)>;
 
 /* from coprocessor to ARM core register */
 def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
              (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                                   c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 
 def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
              (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                                   c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                          c_imm:$CRm, 0)>;
 
 def : T2v6Pat<(int_arm_mrc  imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
               (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aa10af7..e9d5720 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -206,6 +206,14 @@ def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
                 Requires<[HasVFP2]>;
 def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
                 Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+                         (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+                         (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+                         (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+                         (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
 
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
@@ -286,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
           (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 
 // These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
 def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
                   (outs), (ins DPR:$Dd, DPR:$Dm),
                   IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -315,7 +323,7 @@ def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
 
 //===----------------------------------------------------------------------===//
 // FP Unary Operations.
@@ -335,7 +343,7 @@ def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
   let D = VFPNeonA8Domain;
 }
 
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
 def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
                    (outs), (ins DPR:$Dd),
                    IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -376,7 +384,7 @@ def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
 
 def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
@@ -810,7 +818,29 @@ let Constraints = "$a = $dst" in {
 
 // FP to Fixed-Point:
 
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, 
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+  bits<5> dst;
+  // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+  let Inst{22} = dst{0};
+  let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, 
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+  bits<5> dst;
+  // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+  let Inst{22} = dst{4};
+  let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -818,7 +848,7 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
   let D = VFPNeonA8Domain;
 }
 
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -826,7 +856,7 @@ def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
   let D = VFPNeonA8Domain;
 }
 
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
                  IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -834,7 +864,7 @@ def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
   let D = VFPNeonA8Domain;
 }
 
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
                  IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -842,25 +872,25 @@ def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
   let D = VFPNeonA8Domain;
 }
 
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
                  IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
 
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
                  IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
 
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
                  IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
 
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
                  IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
 
 // Fixed-Point to FP:
 
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -868,7 +898,7 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
   let D = VFPNeonA8Domain;
 }
 
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
                        (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -876,7 +906,7 @@ def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
   let D = VFPNeonA8Domain;
 }
 
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -884,7 +914,7 @@ def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
   let D = VFPNeonA8Domain;
 }
 
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
                        (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
                  IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
@@ -892,19 +922,19 @@ def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
   let D = VFPNeonA8Domain;
 }
 
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
 
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
                        (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
 
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
 
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
                        (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
                  IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
 
@@ -1166,9 +1196,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
-                        "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+                        "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
 
 // Application level FPSCR -> GPR
 let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1182,6 +1212,10 @@ let Uses = [FPSCR] in {
                               "vmrs", "\t$Rt, fpexc", []>;
   def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
                               "vmrs", "\t$Rt, fpsid", []>;
+  def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, mvfr0", []>;
+  def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, mvfr1", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1304,6 +1338,13 @@ def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
 def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
 def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
 def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
+
 
 def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
 def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index afbe0e4..753e578 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -62,7 +62,7 @@ extern "C" {
     // concerned, so we can't just preserve the callee saved regs.
     "stmdb sp!, {r0, r1, r2, r3, lr}\n"
 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
-    "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+    "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
     // The LR contains the address of the stub function on entry.
     // pass it as the argument to the C part of the callback
@@ -86,7 +86,7 @@ extern "C" {
     //
 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
     // Restore VFP caller-saved registers.
-    "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+    "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
     //
     //      We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1f83762..6f3819a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
 using namespace llvm;
 
 void ARMRegisterInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 65ed95d..8a24842 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -15,13 +15,12 @@
 #define ARMREGISTERINFO_H
 
 #include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct ARMRegisterInfo : public ARMBaseRegisterInfo {
   virtual void anchor();
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b16a12c..1327fb8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,14 +153,21 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
 }
 
 // Current Program Status Register.
-def CPSR    : ARMReg<0, "cpsr">;
-def APSR    : ARMReg<1, "apsr">;
-def SPSR    : ARMReg<2, "spsr">;
-def FPSCR   : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. 
+def CPSR       : ARMReg<0, "cpsr">;
+def APSR       : ARMReg<1, "apsr">;
+def SPSR       : ARMReg<2, "spsr">;
+def FPSCR      : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+  let Aliases = [FPSCR];
+}
+def ITSTATE    : ARMReg<4, "itstate">;
 
 // Special Registers - only available in privileged mode.
 def FPSID   : ARMReg<0, "fpsid">;
+def MVFR1   : ARMReg<6, "mvfr1">;
+def MVFR0   : ARMReg<7, "mvfr0">;
 def FPEXC   : ARMReg<8, "fpexc">;
 
 // Register classes.
@@ -304,7 +311,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
 
 // Register class representing a pair of consecutive D registers.
 // Use the Q registers for the even-odd pairs.
-def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> {
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                          128, (interleave QPR, TuplesOE2D)> {
   // Allocate starting at non-VFP2 registers D16-D31 first.
   let AltOrders = [(rotl DPair, 16)];
   let AltOrderSelect = [{ return 1; }];
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2045482..911eb13 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1101,13 +1101,8 @@ public:
     return VectorList.Count == 4;
   }
 
-  bool isVecListTwoQ() const {
-    if (!isDoubleSpacedVectorList()) return false;
-    return VectorList.Count == 2;
-  }
-
   bool isVecListDPairSpaced() const {
-    if (!isSingleSpacedVectorList()) return false;
+    if (isSingleSpacedVectorList()) return false;
     return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
               .contains(VectorList.RegNum));
   }
@@ -1133,12 +1128,13 @@ public:
     return VectorList.Count == 1;
   }
 
-  bool isVecListTwoDAllLanes() const {
+  bool isVecListDPairAllLanes() const {
     if (!isSingleSpacedVectorAllLanes()) return false;
-    return VectorList.Count == 2;
+    return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+              .contains(VectorList.RegNum));
   }
 
-  bool isVecListTwoQAllLanes() const {
+  bool isVecListDPairSpacedAllLanes() const {
     if (!isDoubleSpacedVectorAllLanes()) return false;
     return VectorList.Count == 2;
   }
@@ -2858,8 +2854,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
     // List must be monotonically increasing.
-    if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
-      return Error(RegLoc, "register list not in ascending order");
+    if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+      if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+        Warning(RegLoc, "register list not in ascending order");
+      else
+        return Error(RegLoc, "register list not in ascending order");
+    }
     if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
       Warning(RegLoc, "duplicated register (" + RegTok.getString() +
               ") in register list");
@@ -2905,6 +2905,12 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
       Parser.Lex(); // Eat the ']'.
       return MatchOperand_Success;
     }
+
+    // There's an optional '#' token here. Normally there wouldn't be, but
+    // inline assemble puts one in, and it's friendly to accept that.
+    if (Parser.getTok().is(AsmToken::Hash))
+      Parser.Lex(); // Eat the '#'
+
     const MCExpr *LaneIndex;
     SMLoc Loc = Parser.getTok().getLoc();
     if (getParser().ParseExpression(LaneIndex)) {
@@ -2981,12 +2987,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       case NoLanes:
         E = Parser.getTok().getLoc();
         Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
-                                      &ARMMCRegisterClasses[ARM::DPairRegClassID]);
-
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
         break;
       case AllLanes:
         E = Parser.getTok().getLoc();
+        Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
         Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
                                                                 S, E));
         break;
@@ -3152,7 +3159,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   switch (LaneKind) {
   case NoLanes:
-    // Non-lane two-register operands have been converted to the
+    // Two-register operands have been converted to the
     // composite register classes.
     if (Count == 2) {
       const MCRegisterClass *RC = (Spacing == 1) ?
@@ -3165,6 +3172,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
                                                     (Spacing == 2), S, E));
     break;
   case AllLanes:
+    // Two-register operands have been converted to the
+    // composite register classes.
+    if (Count == 2) {
+      const MCRegisterClass *RC = (Spacing == 1) ?
+        &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+        &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+      FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+    }
     Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
                                                             (Spacing == 2),
                                                             S, E));
@@ -3253,7 +3268,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (isMClass()) {
     // See ARMv6-M 10.1.1
-    unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+    std::string Name = Mask.lower();
+    unsigned FlagsVal = StringSwitch<unsigned>(Name)
       .Case("apsr", 0)
       .Case("iapsr", 1)
       .Case("eapsr", 2)
@@ -4427,10 +4443,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     else if (Res == -1) // irrecoverable error
       return true;
     // If this is VMRS, check for the apsr_nzcv operand.
-    if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+    if (Mnemonic == "vmrs" &&
+        Parser.getTok().getString().equals_lower("apsr_nzcv")) {
       S = Parser.getTok().getLoc();
       Parser.Lex();
-      Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+      Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
       return false;
     }
 
@@ -4598,7 +4615,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
         Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
         Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
         Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
-        Mnemonic == "fmuls" || Mnemonic == "fcmps" ||
+        Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
         (Mnemonic == "movs" && isThumb()))) {
     Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
     CarrySetting = true;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4101f59..ce4587b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -849,7 +849,7 @@ extern "C" void LLVMInitializeARMDisassembler() {
                                          createThumbDisassembler);
 }
 
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3,
   ARM::R4, ARM::R5, ARM::R6, ARM::R7,
   ARM::R8, ARM::R9, ARM::R10, ARM::R11,
@@ -869,8 +869,14 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
 static DecodeStatus
 DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
                            uint64_t Address, const void *Decoder) {
-  if (RegNo == 15) return MCDisassembler::Fail;
-  return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+  DecodeStatus S = MCDisassembler::Success;
+  
+  if (RegNo == 15) 
+    S = MCDisassembler::SoftFail;
+
+  Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+  return S;
 }
 
 static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
@@ -916,7 +922,7 @@ static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
      ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
      ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
      ARM::S8,  ARM::S9, ARM::S10, ARM::S11,
@@ -937,7 +943,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
      ARM::D8,  ARM::D9, ARM::D10, ARM::D11,
@@ -973,7 +979,7 @@ DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7,
      ARM::Q8,  ARM::Q9, ARM::Q10, ARM::Q11,
@@ -992,7 +998,7 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static const unsigned DPairDecoderTable[] = {
+static const uint16_t DPairDecoderTable[] = {
   ARM::Q0,  ARM::D1_D2,   ARM::Q1,  ARM::D3_D4,   ARM::Q2,  ARM::D5_D6,
   ARM::Q3,  ARM::D7_D8,   ARM::Q4,  ARM::D9_D10,  ARM::Q5,  ARM::D11_D12,
   ARM::Q6,  ARM::D13_D14, ARM::Q7,  ARM::D15_D16, ARM::Q8,  ARM::D17_D18,
@@ -1011,7 +1017,7 @@ static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static const unsigned DPairSpacedDecoderTable[] = {
+static const uint16_t DPairSpacedDecoderTable[] = {
   ARM::D0_D2,   ARM::D1_D3,   ARM::D2_D4,   ARM::D3_D5,
   ARM::D4_D6,   ARM::D5_D7,   ARM::D6_D8,   ARM::D7_D9,
   ARM::D8_D10,  ARM::D9_D11,  ARM::D10_D12, ARM::D11_D13,
@@ -2001,27 +2007,15 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // First output register
   switch (Inst.getOpcode()) {
-  case ARM::VLD1q16:
-  case ARM::VLD1q32:
-  case ARM::VLD1q64:
-  case ARM::VLD1q8:
-  case ARM::VLD1q16wb_fixed:
-  case ARM::VLD1q16wb_register:
-  case ARM::VLD1q32wb_fixed:
-  case ARM::VLD1q32wb_register:
-  case ARM::VLD1q64wb_fixed:
-  case ARM::VLD1q64wb_register:
-  case ARM::VLD1q8wb_fixed:
-  case ARM::VLD1q8wb_register:
-  case ARM::VLD2d16:
-  case ARM::VLD2d32:
-  case ARM::VLD2d8:
-  case ARM::VLD2d16wb_fixed:
-  case ARM::VLD2d16wb_register:
-  case ARM::VLD2d32wb_fixed:
-  case ARM::VLD2d32wb_register:
-  case ARM::VLD2d8wb_fixed:
-  case ARM::VLD2d8wb_register:
+  case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+  case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+  case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+  case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+  case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+  case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+  case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+  case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+  case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
     if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
     break;
@@ -2325,6 +2319,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
     case ARM::VST2b8wb_register:
     case ARM::VST2b16wb_register:
     case ARM::VST2b32wb_register:
+      Inst.addOperand(MCOperand::CreateImm(0));
+      break;
     case ARM::VST3d8_UPD:
     case ARM::VST3d16_UPD:
     case ARM::VST3d32_UPD:
@@ -2366,6 +2362,23 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
     case ARM::VST1q16wb_fixed:
     case ARM::VST1q32wb_fixed:
     case ARM::VST1q64wb_fixed:
+    case ARM::VST1d8Twb_fixed:
+    case ARM::VST1d16Twb_fixed:
+    case ARM::VST1d32Twb_fixed:
+    case ARM::VST1d64Twb_fixed:
+    case ARM::VST1d8Qwb_fixed:
+    case ARM::VST1d16Qwb_fixed:
+    case ARM::VST1d32Qwb_fixed:
+    case ARM::VST1d64Qwb_fixed:
+    case ARM::VST2d8wb_fixed:
+    case ARM::VST2d16wb_fixed:
+    case ARM::VST2d32wb_fixed:
+    case ARM::VST2q8wb_fixed:
+    case ARM::VST2q16wb_fixed:
+    case ARM::VST2q32wb_fixed:
+    case ARM::VST2b8wb_fixed:
+    case ARM::VST2b16wb_fixed:
+    case ARM::VST2b32wb_fixed:
       break;
   }
 
@@ -2525,8 +2538,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   align *= (1 << size);
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+  case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+  case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+  case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  }
   if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -2556,18 +2580,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   unsigned align = fieldFromInstruction32(Insn, 4, 1);
   unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+  unsigned pred = fieldFromInstruction32(Insn, 22, 4);
   align *= 2*size;
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
-  if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
-    return MCDisassembler::Fail;
-  if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+  switch (Inst.getOpcode()) {
+  case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+  case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+  case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+  case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+  case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+  case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+  case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+    if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
+    break;
   }
 
+  if (Rm != 0xF)
+    Inst.addOperand(MCOperand::CreateImm(0));
+
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(align));
@@ -2579,6 +2618,9 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
       return MCDisassembler::Fail;
   }
 
+  if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
   return S;
 }
 
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index bae4e78..2b994df 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1026,15 +1026,6 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
 }
 
 void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
-                                        raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}";
-}
-
-void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
                                           raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1042,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum,
   O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
 }
 
-void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI,
-                                                unsigned OpNum,
-                                                raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+                                              unsigned OpNum,
+                                              raw_ostream &O) {
   unsigned Reg = MI->getOperand(OpNum).getReg();
   unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
   unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1081,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
 void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
                                                 unsigned OpNum,
                                                 raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
 }
 
 void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
@@ -1111,23 +1101,13 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
     << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
 }
 
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                              raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
-}
-
 void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
                                                       unsigned OpNum,
                                                       raw_ostream &O) {
-  // Normally, it's not safe to use register enum values directly with
-  // addition to get the next register, but for VFP registers, the
-  // sort order is guaranteed because they're all of the form D<n>.
-  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
-    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+  O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
 }
 
 void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 1037161..e9cd407 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -134,9 +134,8 @@ public:
   void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum,
-                                  raw_ostream &O);
+  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
   void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
@@ -147,8 +146,6 @@ public:
                                     raw_ostream &O);
   void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
                                    raw_ostream &O);
-  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
-                                raw_ostream &O);
   void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
                                         raw_ostream &O);
   void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index d3a3d3a..25849ee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -167,6 +167,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   case ARM::tBcc:       return ARM::t2Bcc;
   case ARM::tLDRpciASM: return ARM::t2LDRpci;
   case ARM::tADR:       return ARM::t2ADR;
+  case ARM::tB:         return ARM::t2B;
   }
 }
 
@@ -181,6 +182,16 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
                                          const MCInstFragment *DF,
                                          const MCAsmLayout &Layout) const {
   switch ((unsigned)Fixup.getKind()) {
+  case ARM::fixup_arm_thumb_br: {
+    // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+    // low bit being an implied zero. There's an implied +4 offset for the
+    // branch, so we adjust the other way here to determine what's
+    // encodable.
+    //
+    // Relax if the value is too big for a (signed) i8.
+    int64_t Offset = int64_t(Value) - 4;
+    return Offset > 2046 || Offset < -2048;
+  }
   case ARM::fixup_arm_thumb_bcc: {
     // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
     // low bit being an implied zero. There's an implied +4 offset for the
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 06eb4e5..ae11be8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -187,21 +187,37 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
   case S31: case D31: return 31;
 
   // Composite registers use the regnum of the first register in the list.
-  case D1_D2:   return 1;
-  case D3_D5:   return 3;
-  case D5_D7:   return 5;
-  case D7_D9:   return 7;
-  case D9_D10:  return 9;
-  case D11_D12: return 11;
-  case D13_D14: return 13;
-  case D15_D16: return 15;
-  case D17_D18: return 17;
-  case D19_D20: return 19;
-  case D21_D22: return 21;
-  case D23_D24: return 23;
-  case D25_D26: return 25;
-  case D27_D28: return 27;
-  case D29_D30: return 29;
+  /* Q0  */     case D0_D2:   return 0;
+  case D1_D2:   case D1_D3:   return 1;
+  /* Q1  */     case D2_D4:   return 2;
+  case D3_D4:   case D3_D5:   return 3;
+  /* Q2  */     case D4_D6:   return 4;
+  case D5_D6:   case D5_D7:   return 5;
+  /* Q3  */     case D6_D8:   return 6;
+  case D7_D8:   case D7_D9:   return 7;
+  /* Q4  */     case D8_D10:  return 8;
+  case D9_D10:  case D9_D11:  return 9;
+  /* Q5  */     case D10_D12: return 10;
+  case D11_D12: case D11_D13: return 11;
+  /* Q6  */     case D12_D14: return 12;
+  case D13_D14: case D13_D15: return 13;
+  /* Q7  */     case D14_D16: return 14;
+  case D15_D16: case D15_D17: return 15;
+  /* Q8  */     case D16_D18: return 16;
+  case D17_D18: case D17_D19: return 17;
+  /* Q9  */     case D18_D20: return 18;
+  case D19_D20: case D19_D21: return 19;
+  /* Q10 */     case D20_D22: return 20;
+  case D21_D22: case D21_D23: return 21;
+  /* Q11 */     case D22_D24: return 22;
+  case D23_D24: case D23_D25: return 23;
+  /* Q12 */     case D24_D26: return 24;
+  case D25_D26: case D25_D27: return 25;
+  /* Q13 */     case D26_D28: return 26;
+  case D27_D28: case D27_D29: return 27;
+  /* Q14 */     case D28_D30: return 28;
+  case D29_D30: case D29_D31: return 29;
+  /* Q15 */
   }
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 1606b92..ed27f9f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -151,13 +151,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin())
-    return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+    return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
 
   if (TheTriple.isOSWindows()) {
     llvm_unreachable("ARM does not support Windows COFF format");
   }
 
-  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+  return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
 }
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index faf73ac..9d3da14 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -34,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
                                     MCValue Target,
                                     unsigned Log2Size,
                                     uint64_t &FixedValue);
-  void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
-                                   const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout,
-                                   const MCFragment *Fragment,
-                                   const MCFixup &Fixup, MCValue Target,
-                                   uint64_t &FixedValue);
+  void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+                                        const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup, MCValue Target,
+                                        uint64_t &FixedValue);
 
 public:
   ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -102,34 +102,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
     Log2Size = llvm::Log2_32(4);
     return true;
 
+  // For movw/movt r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
   case ARM::fixup_arm_movt_hi16:
   case ARM::fixup_arm_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 1;
+    return true;
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movt_hi16_pcrel:
-    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
-    // Report as 'long', even though that is not quite accurate.
-    Log2Size = llvm::Log2_32(4);
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 3;
     return true;
 
   case ARM::fixup_arm_movw_lo16:
   case ARM::fixup_arm_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 0;
+    return true;
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_t2_movw_lo16_pcrel:
     RelocType = unsigned(macho::RIT_ARM_Half);
-    // Report as 'long', even though that is not quite accurate.
-    Log2Size = llvm::Log2_32(4);
+    Log2Size = 2;
     return true;
   }
 }
 
 void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
-                            const MCAssembler &Asm,
-                            const MCAsmLayout &Layout,
-                            const MCFragment *Fragment,
-                            const MCFixup &Fixup,
-                            MCValue Target,
-                            uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+                                 const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup,
+                                 MCValue Target,
+                                 uint64_t &FixedValue) {
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Type = macho::RIT_ARM_Half;
@@ -313,10 +326,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   // scattered relocation entry.  Differences always require scattered
   // relocations.
   if (Target.getSymB()) {
-    if (RelocType == macho::RIT_ARM_Half ||
-        RelocType == macho::RIT_ARM_HalfDifference)
-      return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
-                                         Target, FixedValue);
+    if (RelocType == macho::RIT_ARM_Half)
+      return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+                                              Fixup, Target, FixedValue);
     return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
                                         Target, Log2Size, FixedValue);
   }
@@ -391,6 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
                (Log2Size  << 25) |
                (IsExtern  << 27) |
                (Type      << 28));
+
+  // Even when it's not a scattered relocation, movw/movt always uses
+  // a PAIR relocation.
+  if (Type == macho::RIT_ARM_Half) {
+    // The other-half value only gets populated for the movt relocation.
+    uint32_t Value = 0;;
+    switch ((unsigned)Fixup.getKind()) {
+    default: break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Value = FixedValue;
+      break;
+    }
+    macho::RelocationEntry MREPair;
+    MREPair.Word0 = Value;
+    MREPair.Word1 = ((0xffffff) |
+                     (Log2Size << 25) |
+                     (macho::RIT_Pair << 28));
+
+    Writer->addRelocation(Fragment->getParent(), MREPair);
+  }
+
   Writer->addRelocation(Fragment->getParent(), MRE);
 }
 
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index a89a663..edd73c2 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index adaccdd..8cf7cac 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "Thumb1InstrInfo.h"
 #include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 4d97626..27fce9b 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -14,10 +14,10 @@
 #ifndef THUMB1INSTRUCTIONINFO_H
 #define THUMB1INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6b8bf0e..ef77bbd 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -12,12 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Thumb1RegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59..6971842 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
 #define THUMB1REGISTERINFO_H
 
 #include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
 public:
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 6cb182a..2fe4b85 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -15,7 +15,6 @@
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a754649..1ae2ef1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -14,10 +14,10 @@
 #ifndef THUMB2INSTRUCTIONINFO_H
 #define THUMB2INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "ARM.h"
 #include "ARMInstrInfo.h"
 #include "Thumb2RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 namespace llvm {
 class ARMSubtarget;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 6d210fe..29a87d0 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Thumb2RegisterInfo.h"
 #include "ARM.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378a..6b397e8 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
 #define THUMB2REGISTERINFO_H
 
 #include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
 public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 5ee5f42..fb9d93b 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
   /// ReduceTable - A static table with information on mapping from wide
   /// opcodes to narrow
   struct ReduceEntry {
-    unsigned WideOpc;      // Wide opcode
-    unsigned NarrowOpc1;   // Narrow opcode to transform to
-    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
+    uint16_t WideOpc;      // Wide opcode
+    uint16_t NarrowOpc1;   // Narrow opcode to transform to
+    uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
     unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -189,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
 }
 
 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
-  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+  for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 80973b7..b6b209e 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2392,17 +2392,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
   printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
   Out << ";\n";
 
-  unsigned NumCases = SI.getNumCases();
   // Skip the first item since that's the default case.
-  for (unsigned i = 0; i < NumCases; ++i) {
-    ConstantInt* CaseVal = SI.getCaseValue(i);
-    BasicBlock* Succ = SI.getCaseSuccessor(i);
+  for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+    ConstantInt* CaseVal = i.getCaseValue();
+    BasicBlock* Succ = i.getCaseSuccessor();
     Out << "  case ";
     writeOperand(CaseVal);
     Out << ":\n";
     printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
     printBranchToBlock(SI.getParent(), Succ, 2);
-    if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+    if (Function::iterator(Succ) ==
+        llvm::next(Function::iterator(SI.getParent())))
       Out << "    break;\n";
   }
 
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 916f9ba..fac806e 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPU.h"
 #include "SPUFrameLowering.h"
+#include "SPU.h"
 #include "SPUInstrBuilder.h"
 #include "SPUInstrInfo.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 3d2b32d..55b3f72 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -31,14 +31,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <map>
 
 using namespace llvm;
 
-// Used in getTargetNodeName() below
 namespace {
-  std::map<unsigned, const char *> node_names;
-
   // Byte offset of the preferred slot (counted from the MSB)
   int prefslotOffset(EVT VT) {
     int retval=0;
@@ -481,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setSchedulingPreference(Sched::RegPressure);
 }
 
-const char *
-SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
-  if (node_names.empty()) {
-    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
-    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
-    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
-    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
-    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
-    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
-    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
-    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
-    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
-    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
-    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
-    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
-    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
-    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
-    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
-    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
-    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
-    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
-    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
-            "SPUISD::ROTBYTES_LEFT_BITS";
-    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
-    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
-    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
-    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
-    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
-  }
-
-  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
-
-  return ((i != node_names.end()) ? i->second : 0);
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+  case SPUISD::Hi: return "SPUISD::Hi";
+  case SPUISD::Lo: return "SPUISD::Lo";
+  case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+  case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+  case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+  case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+  case SPUISD::CALL: return "SPUISD::CALL";
+  case SPUISD::SHUFB: return "SPUISD::SHUFB";
+  case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+  case SPUISD::CNTB: return "SPUISD::CNTB";
+  case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+  case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+  case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+  case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+  case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+  case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+  case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+  case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+  case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+  case SPUISD::SELB: return "SPUISD::SELB";
+  case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+  case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+  case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -1216,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
   if (isVarArg) {
     // FIXME: we should be able to query the argument registers from
     //        tablegen generated code.
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
@@ -1230,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
     };
     // size of ArgRegs array
-    unsigned NumArgRegs = 77;
+    const unsigned NumArgRegs = 77;
 
     // We will spill (79-3)+1 registers to the stack
     SmallVector<SDValue, 79-3+1> MemOps;
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index e28e2a4..25c5355 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -15,9 +15,9 @@
 #ifndef SPU_ISELLOWERING_H
 #define SPU_ISELLOWERING_H
 
+#include "SPU.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "SPU.h"
 
 namespace llvm {
   namespace SPUISD {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index f0d21ad..85e5821 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -15,8 +15,8 @@
 #define SPU_INSTRUCTIONINFO_H
 
 #include "SPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "SPUGenInstrInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 92983e1..1b2da5f 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "reginfo"
-#include "SPU.h"
 #include "SPURegisterInfo.h"
+#include "SPU.h"
 #include "SPUInstrBuilder.h"
 #include "SPUSubtarget.h"
 #include "SPUMachineFunction.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index e43f5ad..21f6b25 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPU.h"
 #include "SPUTargetMachine.h"
+#include "SPU.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Support/DynamicLibrary.h"
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index c179292..3e5d38c 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -23,9 +23,6 @@
 #include "llvm/Target/TargetData.h"
 
 namespace llvm {
-class PassManager;
-class GlobalValue;
-class TargetFrameLowering;
 
 /// SPUTargetMachine
 ///
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 76b5e9c..107c6cc 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1090,10 +1090,10 @@ void CppWriter::printInstruction(const Instruction *I,
         << getOpName(SI->getDefaultDest()) << ", "
         << SI->getNumCases() << ", " << bbname << ");";
     nl(Out);
-    unsigned NumCases = SI->getNumCases();
-    for (unsigned i = 0; i < NumCases; ++i) {
-      const ConstantInt* CaseVal = SI->getCaseValue(i);
-      const BasicBlock *BB = SI->getCaseSuccessor(i);
+    for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      const ConstantInt* CaseVal = i.getCaseValue();
+      const BasicBlock *BB = i.getCaseSuccessor();
       Out << iName << "->addCase("
           << getOpName(CaseVal) << ", "
           << getOpName(BB) << ");";
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index bbefcaf..270c7a7 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -15,7 +15,6 @@
 #ifndef TARGET_Hexagon_H
 #define TARGET_Hexagon_H
 
-#include <cassert>
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/Target/TargetLowering.h"
 
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 688b8e3..bf333b7 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -32,11 +32,11 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -46,8 +46,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 71787de..46c20e9 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -14,13 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "Hexagon.h"
 using namespace llvm;
 
 Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 49c6cdf..e8a6924 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -8,13 +8,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "HexagonFrameLowering.h"
 #include "Hexagon.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "HexagonFrameLowering.h"
 #include "llvm/Function.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 04ea4ed..57772a5 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -27,6 +27,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hwloops"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
@@ -43,8 +45,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index ed4b840..d6da0d0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -28,17 +28,16 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "HexagonMachineFunctionInfo.h"
 #include "llvm/Support/CommandLine.h"
+using namespace llvm;
 
 const unsigned Hexagon_MAX_RET_SIZE = 64;
-using namespace llvm;
 
 static cl::opt<bool>
 EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
@@ -159,7 +158,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
                          MVT LocVT, CCValAssign::LocInfo LocInfo,
                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
 
-  static const unsigned RegList[] = {
+  static const uint16_t RegList[] = {
     Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
     Hexagon::R5
   };
@@ -182,10 +181,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
     return false;
   }
 
-  static const unsigned RegList1[] = {
+  static const uint16_t RegList1[] = {
     Hexagon::D1, Hexagon::D2
   };
-  static const unsigned RegList2[] = {
+  static const uint16_t RegList2[] = {
     Hexagon::R1, Hexagon::R3
   };
   if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 5396486..4208bcb 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -15,10 +15,10 @@
 #ifndef Hexagon_ISELLOWERING_H
 #define Hexagon_ISELLOWERING_H
 
+#include "Hexagon.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
-#include "Hexagon.h"
 
 namespace llvm {
   namespace HexagonISD {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 07872d4..3d7ace5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonRegisterInfo.h"
 #include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "Hexagon.h"
 #include "llvm/ADT/STLExtras.h"
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index eb088c3..7306870 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -14,10 +14,10 @@
 #ifndef HexagonINSTRUCTIONINFO_H
 #define HexagonINSTRUCTIONINFO_H
 
+#include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "HexagonRegisterInfo.h"
 
 
 #define GET_INSTRINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 06c732f..55cbc09 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -36,6 +36,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
@@ -45,16 +47,13 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-
-#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index c481270..2a9de92 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Hexagon.h"
 #include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonMachineFunctionInfo.h"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index fc65305..6cf727b 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -16,9 +16,10 @@
 #define HexagonREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+
 #define GET_REGINFO_HEADER
 #include "HexagonGenRegisterInfo.inc"
-#include "llvm/MC/MachineLocation.h"
 
 //
 //  We try not to hard code the reserved registers in our code,
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 319eab2..b9e6894 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -50,7 +50,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
 ///
 HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
                                            StringRef CPU, StringRef FS,
-                                           TargetOptions Options,
+                                           const TargetOptions &Options,
                                            Reloc::Model RM,
                                            CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70bea56..0336965 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,13 +14,13 @@
 #ifndef HexagonTARGETMACHINE_H
 #define HexagonTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonISelLowering.h"
 #include "HexagonSelectionDAGInfo.h"
 #include "HexagonFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
 
 namespace llvm {
 
@@ -37,8 +37,9 @@ class HexagonTargetMachine : public LLVMTargetMachine {
 
 public:
   HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
-                       StringRef FS, TargetOptions Options, Reloc::Model RM,
-                       CodeModel::Model CM, CodeGenOpt::Level OL);
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
 
   virtual const HexagonInstrInfo *getInstrInfo() const {
     return &InstrInfo;
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index d3ce5a6..32cc709 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -11,6 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Target/TargetData.h"
@@ -18,9 +21,6 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/CommandLine.h"
-#include "HexagonSubtarget.h"
-#include "HexagonTargetObjectFile.h"
-#include "HexagonTargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 74abc56..3cfa4fd 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===//
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file provides Cell Hexagon specific target descriptions.
+// This file provides Hexagon specific target descriptions.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 364841f..b18d23a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef SPUMCTARGETDESC_H
-#define SPUMCTARGETDESC_H
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
 
 namespace llvm {
 class MCSubtargetInfo;
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
index 1114d99..73c7e01 100644
--- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
index 67be2bc..885be2d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/Makefile
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index adedf93..6b958c8 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -34,9 +34,9 @@ extern const MCInstrDesc MBlazeInsts[];
 
 using namespace llvm;
 
-const unsigned UNSUPPORTED = -1;
+const uint16_t UNSUPPORTED = -1;
 
-static const unsigned mblazeBinary2Opcode[] = {
+static const uint16_t mblazeBinary2Opcode[] = {
   MBlaze::ADD,   MBlaze::RSUB,   MBlaze::ADDC,   MBlaze::RSUBC,   //00,01,02,03
   MBlaze::ADDK,  MBlaze::RSUBK,  MBlaze::ADDKC,  MBlaze::RSUBKC,  //04,05,06,07
   MBlaze::ADDI,  MBlaze::RSUBI,  MBlaze::ADDIC,  MBlaze::RSUBIC,  //08,09,0A,0B
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 8be15bf..01e6578 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -15,11 +15,10 @@
 #define MBLAZE_FRAMEINFO_H
 
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class MBlazeSubtarget;
+class MBlazeSubtarget;
 
 class MBlazeFrameLowering : public TargetFrameLowering {
 protected:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 23c8e13..9ef6bb6 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -657,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                 CCValAssign::LocInfo &LocInfo,
                                 ISD::ArgFlagsTy &ArgFlags,
                                 CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     MBlaze::R5, MBlaze::R6, MBlaze::R7,
     MBlaze::R8, MBlaze::R9, MBlaze::R10
   };
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 168694b..6a79fc1 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,11 +15,11 @@
 #ifndef MBlazeISELLOWERING_H
 #define MBlazeISELLOWERING_H
 
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 
 namespace llvm {
   namespace MBlazeCC {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index a309d2b..5252147 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -15,9 +15,9 @@
 #define MBLAZEINSTRUCTIONINFO_H
 
 #include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "MBlazeRegisterInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index bb77ed4..7b97744 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -14,7 +14,6 @@
 
 namespace llvm {
   class AsmPrinter;
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 6801a1a..46f5207 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "mblaze-frame-info"
 
+#include "MBlazeRegisterInfo.h"
 #include "MBlaze.h"
 #include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
 #include "MBlazeMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Type.h"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 5c07424..dd7de9b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MBlaze.h"
 #include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index fd5de34..c03ba47 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
 #include "MSP430InstrInfo.h"
+#include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index fe2a75c..04f339b 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_TARGET_MSP430INSTRINFO_H
 #define LLVM_TARGET_MSP430INSTRINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index 297efd2..24151e2 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -14,7 +14,6 @@
 
 namespace llvm {
   class AsmPrinter;
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index f9ddfb3..51ec71a 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -13,9 +13,9 @@
 
 #define DEBUG_TYPE "msp430-reg-info"
 
+#include "MSP430RegisterInfo.h"
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "MSP430RegisterInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index e7bebbd..4d8792e 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -15,12 +15,11 @@
 #define LLVM_TARGET_MSP430_SUBTARGET_H
 
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "MSP430GenSubtargetInfo.inc"
 
-#include <string>
-
 namespace llvm {
 class StringRef;
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index af62e48..9f2eda1 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
 #include "MSP430TargetMachine.h"
+#include "MSP430.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index d69570b..9d5a2f1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -92,25 +92,42 @@ public:
     if (!Value)
       return; // Doesn't change encoding.
 
+    // Where do we start in the object
     unsigned Offset = Fixup.getOffset();
-    // FIXME: The below code will not work across endian models
-    // How many bytes/bits are we fixing up?
-    unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
-    uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+    // Number of bytes we need to fixup
+    unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+    // Used to point to big endian bytes
+    unsigned FullSize;
+
+    switch ((unsigned)Kind) {
+    case Mips::fixup_Mips_16:
+      FullSize = 2;
+      break;
+    case Mips::fixup_Mips_64:
+      FullSize = 8;
+      break;
+    default:
+      FullSize = 4;
+      break;
+    }
 
     // Grab current value, if any, from bits.
     uint64_t CurVal = 0;
-    for (unsigned i = 0; i != NumBytes; ++i)
-      CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
 
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+      CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+    }
+
+    uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
     CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
 
-    // Write out the bytes back to the code/data bits.
-    // First the unaffected bits and then the fixup.
+    // Write out the fixed up bytes back to the code/data bits.
     for (unsigned i = 0; i != NumBytes; ++i) {
-      Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
+      unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+      Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
     }
-}
+  }
 
   unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index b039678..9ebb6d2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -49,9 +49,9 @@ public:
 
   void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
     // Output the instruction encoding in little endian byte order.
-    for (unsigned i = 0; i != Size; ++i) {
-      EmitByte(Val & 255, OS);
-      Val >>= 8;
+    for (unsigned i = 0; i < Size; ++i) {
+      unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+      EmitByte((Val >> Shift) & 0xff, OS);
     }
   }
 
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bacecf2..bafadc8 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,8 +21,6 @@
 namespace llvm {
   class MipsTargetMachine;
   class FunctionPass;
-  class MachineCodeEmitter;
-  class formatted_raw_ostream;
 
   FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 31b669a..dc8fbd0 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -26,28 +26,28 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
     Iter->push_back(I);
 }
 
-void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
                                              InstSeqLs &SeqLs) {
-  GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs);
-  AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff));
+  GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+  AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
 }
 
-void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
                                            InstSeqLs &SeqLs) {
-  GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs);
-  AddInstr(SeqLs, Inst(ORi, Imm & 0xffff));
+  GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+  AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
 }
 
-void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
                                            InstSeqLs &SeqLs) {
   unsigned Shamt = CountTrailingZeros_64(Imm);
   GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
   AddInstr(SeqLs, Inst(SLL, Shamt));
 }
 
-void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize,
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
                                         InstSeqLs &SeqLs) {
-  int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size));
+  uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
 
   // Do nothing if Imm is 0.
   if (!MaskedImm)
@@ -122,7 +122,7 @@ void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
 }
 
 const MipsAnalyzeImmediate::InstSeq
-&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size,
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
                                bool LastInstrIsADDiu) {
   this->Size = Size;
 
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
index 24e6e5f..a094dda 100644
--- a/lib/Target/Mips/MipsAnalyzeImmediate.h
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -25,7 +25,7 @@ namespace llvm {
     /// Analyze - Get an instrucion sequence to load immediate Imm. The last
     /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
     /// true;
-    const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+    const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
   private:
     typedef SmallVector<InstSeq, 5> InstSeqLs;
 
@@ -34,18 +34,18 @@ namespace llvm {
 
     /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
     /// load immediate Imm
-    void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+    void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
 
     /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
     /// load immediate Imm
-    void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+    void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
 
     /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
     /// load immediate Imm
-    void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+    void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
 
     /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
-    void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+    void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
 
     /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
     void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index aeabc0f..f2b842a 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
-#include "Mips.h"
 #include "MipsAsmPrinter.h"
+#include "Mips.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsMCInstLower.h"
@@ -34,8 +34,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 8502db2..473da7e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -22,9 +22,9 @@
 namespace llvm {
 class MCStreamer;
 class MachineInstr;
-class raw_ostream;
 class MachineBasicBlock;
 class Module;
+class raw_ostream;
 
 class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
 
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index e83c64e..ebfbb4a 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsAnalyzeImmediate.h"
 #include "MipsFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 782d203..536879e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -99,6 +99,8 @@ private:
     return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
   }
 
+  void ProcessFunctionAfterISel(MachineFunction &MF);
+  bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
   void InitGlobalBaseReg(MachineFunction &MF);
 
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -181,10 +183,57 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
   }
 }
 
+bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+                                              const MachineInstr& MI) {
+  unsigned DstReg = 0, ZeroReg = 0;
+
+  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+  if ((MI.getOpcode() == Mips::ADDiu) &&
+      (MI.getOperand(1).getReg() == Mips::ZERO) &&
+      (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO;
+  } else if ((MI.getOpcode() == Mips::DADDiu) &&
+             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+             (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO_64;
+  }
+
+  if (!DstReg)
+    return false;
+
+  // Replace uses with ZeroReg.
+  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+       E = MRI->use_end(); U != E; ++U) {
+    MachineOperand &MO = U.getOperand();
+    MachineInstr *MI = MO.getParent();
+
+    // Do not replace if it is a phi's operand or is tied to def operand.
+    if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+      continue;
+
+    MO.setReg(ZeroReg);
+  }
+
+  return true;
+}
+
+void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
+  InitGlobalBaseReg(MF);
+
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+       ++MFI)
+    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+      ReplaceUsesWithZeroReg(MRI, *I);
+}
+
 bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
 
-  InitGlobalBaseReg(MF);
+  ProcessFunctionAfterISel(MF);
 
   return Ret;
 }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index dc894d9..ecde5b6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -18,13 +18,13 @@
 #include "MipsTargetMachine.h"
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -130,22 +130,32 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   // Mips Custom Operations
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
-  setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
   setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
-  setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
-  setOperationAction(ISD::GlobalTLSAddress,   MVT::i64,   Custom);
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
-  setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
-  setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,   Custom);
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::FCOPYSIGN,          MVT::f32,   Custom);
+  setOperationAction(ISD::FCOPYSIGN,          MVT::f64,   Custom);
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
+
+  if (HasMips64) {
+    setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
+    setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
+    setOperationAction(ISD::GlobalTLSAddress,   MVT::i64,   Custom);
+    setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
+    setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
+    setOperationAction(ISD::SELECT,             MVT::i64,   Custom);
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,   Custom);
+  }
 
   setOperationAction(ISD::SDIV, MVT::i32, Expand);
   setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -185,8 +195,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
-  setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Custom);
-  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Custom);
   setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
   setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
   setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
@@ -214,9 +222,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
 
-  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Custom);
-  setOperationAction(ISD::ATOMIC_FENCE,      MVT::Other, Custom);
-
   setOperationAction(ISD::ATOMIC_LOAD,       MVT::i32,    Expand);
   setOperationAction(ISD::ATOMIC_LOAD,       MVT::i64,    Expand);
   setOperationAction(ISD::ATOMIC_STORE,      MVT::i32,    Expand);
@@ -246,11 +251,11 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::SDIVREM);
   setTargetDAGCombine(ISD::UDIVREM);
-  setTargetDAGCombine(ISD::SETCC);
+  setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
 
-  setMinFunctionAlignment(2);
+  setMinFunctionAlignment(HasMips64 ? 3 : 2);
 
   setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
   computeRegisterProperties();
@@ -559,21 +564,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
                      True.getValueType(), True, False, Cond);
 }
 
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   const MipsSubtarget* Subtarget) {
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const MipsSubtarget* Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+  SDValue SetCC = N->getOperand(0);
 
-  if (Cond.getOpcode() != MipsISD::FPCmp)
+  if ((SetCC.getOpcode() != ISD::SETCC) ||
+      !SetCC.getOperand(0).getValueType().isInteger())
     return SDValue();
 
-  SDValue True  = DAG.getConstant(1, MVT::i32);
-  SDValue False = DAG.getConstant(0, MVT::i32);
+  SDValue False = N->getOperand(2);
+  EVT FalseTy = False.getValueType();
 
-  return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+  if (!FalseTy.isInteger())
+    return SDValue();
+
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+  if (!CN || CN->getZExtValue())
+    return SDValue();
+
+  const DebugLoc DL = N->getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+  SDValue True = N->getOperand(1);
+  
+  SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+                       SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+  
+  return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
 }
 
 static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
@@ -684,8 +705,8 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   case ISD::SDIVREM:
   case ISD::UDIVREM:
     return PerformDivRemCombine(N, DAG, DCI, Subtarget);
-  case ISD::SETCC:
-    return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+  case ISD::SELECT:
+    return PerformSELECTCombine(N, DAG, DCI, Subtarget);  
   case ISD::AND:
     return PerformANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
@@ -708,6 +729,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
     case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
     case ISD::SELECT:             return LowerSELECT(Op, DAG);
+    case ISD::SETCC:              return LowerSETCC(Op, DAG);
     case ISD::VASTART:            return LowerVASTART(Op, DAG);
     case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
     case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
@@ -1475,6 +1497,18 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
                       Op.getDebugLoc());
 }
 
+SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = CreateFPCmp(DAG, Op);
+
+  assert(Cond.getOpcode() == MipsISD::FPCmp &&
+         "Floating point operand expected.");
+
+  SDValue True  = DAG.getConstant(1, MVT::i32);
+  SDValue False = DAG.getConstant(0, MVT::i32);
+
+  return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
 SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
@@ -1841,13 +1875,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
 
   static const unsigned IntRegsSize=4, FloatRegsSize=2;
 
-  static const unsigned IntRegs[] = {
+  static const uint16_t IntRegs[] = {
       Mips::A0, Mips::A1, Mips::A2, Mips::A3
   };
-  static const unsigned F32Regs[] = {
+  static const uint16_t F32Regs[] = {
       Mips::F12, Mips::F14
   };
-  static const unsigned F64Regs[] = {
+  static const uint16_t F64Regs[] = {
       Mips::D6, Mips::D7
   };
 
@@ -1926,10 +1960,10 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
   return false; // CC must always match
 }
 
-static const unsigned Mips64IntRegs[8] =
+static const uint16_t Mips64IntRegs[8] =
   {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
    Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
-static const unsigned Mips64DPRegs[8] =
+static const uint16_t Mips64DPRegs[8] =
   {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
    Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64};
 
@@ -1996,7 +2030,7 @@ AnalyzeMips64CallOperands(CCState &CCInfo,
 
 static const unsigned O32IntRegsSize = 4;
 
-static const unsigned O32IntRegs[] = {
+static const uint16_t O32IntRegs[] = {
   Mips::A0, Mips::A1, Mips::A2, Mips::A3
 };
 
@@ -2115,9 +2149,9 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
   if (!IsRegLoc)
     LocMemOffset = VA.getLocMemOffset();
   else {
-    const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
+    const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
                                     VA.getLocReg());
-    const unsigned *RegEnd = Mips64IntRegs + 8;
+    const uint16_t *RegEnd = Mips64IntRegs + 8;
 
     // Copy double words to registers.
     for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) {
@@ -2540,7 +2574,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
                     MachineFrameInfo *MFI, bool IsRegLoc,
                     SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
                     EVT PtrTy) {
-  const unsigned *Reg = Mips64IntRegs + 8;
+  const uint16_t *Reg = Mips64IntRegs + 8;
   int FOOffset; // Frame object offset from virtual frame pointer.
 
   if (IsRegLoc) {
@@ -2709,7 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
   if (isVarArg) {
     unsigned NumOfRegs = IsO32 ? 4 : 8;
-    const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
+    const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
     int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
     const TargetRegisterClass *RC
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 621bbec..66f45cd 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -15,10 +15,10 @@
 #ifndef MipsISELLOWERING_H
 #define MipsISELLOWERING_H
 
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
 #include "Mips.h"
 #include "MipsSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace MipsISD {
@@ -128,6 +128,7 @@ namespace llvm {
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 10caf30..4be727d 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -15,9 +15,9 @@
 #define MIPSINSTRUCTIONINFO_H
 
 #include "Mips.h"
+#include "MipsRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "MipsRegisterInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MipsGenInstrInfo.inc"
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index be65298..0d51298 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MipsMCInstLower.h"
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
-#include "MipsMCInstLower.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index cbd5264..20bb338 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -14,11 +14,9 @@
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
-  class MCSymbol;
   class MachineInstr;
   class MachineFunction;
   class Mangler;
@@ -38,7 +36,7 @@ public:
   void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
   void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
   void LowerUnalignedLoadStore(const MachineInstr *MI,
-		                           SmallVector<MCInst, 4>& MCInsts);
+                               SmallVector<MCInst, 4>& MCInsts);
   void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
 private:
   MCOperand LowerSymbolOperand(const MachineOperand &MO,
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 57ff069..abb5404 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,10 +14,10 @@
 #ifndef MIPS_MACHINE_FUNCTION_INFO_H
 #define MIPS_MACHINE_FUNCTION_INFO_H
 
-#include <utility>
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include <utility>
 
 namespace llvm {
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e0ecba2..5cfda34 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -13,10 +13,10 @@
 
 #define DEBUG_TYPE "mips-reg-info"
 
+#include "MipsRegisterInfo.h"
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsSubtarget.h"
-#include "MipsRegisterInfo.h"
 #include "MipsMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Type.h"
@@ -83,12 +83,12 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
 
 BitVector MipsRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
-  static const unsigned ReservedCPURegs[] = {
+  static const uint16_t ReservedCPURegs[] = {
     Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
     Mips::SP, Mips::FP, Mips::RA
   };
 
-  static const unsigned ReservedCPU64Regs[] = {
+  static const uint16_t ReservedCPU64Regs[] = {
     Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
     Mips::SP_64, Mips::FP_64, Mips::RA_64
   };
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 8806aaf..ad02231 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Mips.h"
 #include "MipsTargetMachine.h"
+#include "Mips.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 19ae142..80c00e8 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -14,15 +14,15 @@
 #ifndef MIPSTARGETMACHINE_H
 #define MIPSTARGETMACHINE_H
 
-#include "MipsSubtarget.h"
+#include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
-#include "MipsFrameLowering.h"
+#include "MipsJITInfo.h"
 #include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "MipsJITInfo.h"
 
 namespace llvm {
   class formatted_raw_ostream;
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index 77a298d..a3e0f32 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,9 +17,9 @@
 #ifndef PTXBASEINFO_H
 #define PTXBASEINFO_H
 
+#include "PTXMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "PTXMCTargetDesc.h"
 
 namespace llvm {
   namespace PTXStateSpace {
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 7d46cce..ffb92cb 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -1,4 +1,3 @@
-//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 58ac5f2..0b6ac7b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -14,8 +14,8 @@
 
 #define DEBUG_TYPE "ptx-asm-printer"
 
-#include "PTX.h"
 #include "PTXAsmPrinter.h"
+#include "PTX.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXParamManager.h"
 #include "PTXRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e5d4edc..db1c953 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXISelLowering.h"
+#include "PTX.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXRegisterInfo.h"
 #include "PTXSubtarget.h"
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index fd20982..33220f4 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -18,8 +18,6 @@
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
-class PTXSubtarget;
-class PTXTargetMachine;
 
 namespace PTXISD {
   enum NodeType {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 9d6cbf1..443cd54 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "ptx-instrinfo"
 
-#include "PTX.h"
 #include "PTXInstrInfo.h"
+#include "PTX.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
index 74538e6..cc1cc71 100644
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXParamManager.h"
+#include "PTX.h"
 #include "llvm/ADT/StringExtras.h"
 
 using namespace llvm;
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
index 32342f7..92e7728 100644
--- a/lib/Target/PTX/PTXParamManager.h
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include <string>
 
 namespace llvm {
 
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 3f087cd..b6ffd38 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXRegisterInfo.h"
+#include "PTX.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 9305377..40835d0 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXTargetMachine.h"
+#include "PTX.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
@@ -37,8 +38,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
 
 
 using namespace llvm;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 02dad45..9c6eefe 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 5dc1863..24a7178 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,14 +25,11 @@
 namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
-  class formatted_raw_ostream;
   class JITCodeEmitter;
-  class Target;
   class MachineInstr;
   class AsmPrinter;
   class MCInst;
-  class TargetMachine;
-  
+
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 591ae02..4abb469 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "PPC.h"
 #include "PPCTargetMachine.h"
 #include "PPCSubtarget.h"
+#include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
@@ -53,7 +54,6 @@
 #include "llvm/Support/ELF.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "InstPrinter/PPCInstPrinter.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 8efc9c1..9883c2e 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[
   CCCustom<"CC_PPC_SVR4_Custom_Dummy">
 ]>;
 
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+                                        R21, R22, R23, R24, R25, R26, R27, R28,
+                                        R29, R30, R31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_SVR432   : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+                                        R21, R22, R23, R24, R25, R26, R27, R28,
+                                        R29, R30, R31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+                                        X21, X22, X23, X24, X25, X26, X27, X28,
+                                        X29, X30, X31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+                                        X21, X22, X23, X24, X25, X26, X27, X28,
+                                        X29, X30, X31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 6d612f7..b77a80b 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
 
 /// VRRegNo - Map from a numbered VR register to its enum value.
 ///
-static const unsigned short VRRegNo[] = {
+static const uint16_t VRRegNo[] = {
  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 95d0d64..d80a385 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -14,10 +14,10 @@
 #ifndef PPCHAZRECS_H
 #define PPCHAZRECS_H
 
+#include "PPCInstrInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "PPCInstrInfo.h"
 
 namespace llvm {
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index bfed7ba..85b5bc1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,11 @@
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,16 +29,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -1547,7 +1547,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
@@ -1574,7 +1574,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8
   };
@@ -1598,8 +1598,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
 
 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
 /// on Darwin.
-static const unsigned *GetFPR() {
-  static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+  static const uint16_t FPR[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
   };
@@ -1780,13 +1780,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    static const unsigned GPArgRegs[] = {
+    static const uint16_t GPArgRegs[] = {
       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
     };
     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
 
-    static const unsigned FPArgRegs[] = {
+    static const uint16_t FPArgRegs[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
@@ -1879,18 +1879,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
 
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
 
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
@@ -1901,7 +1901,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
 
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   // In 32-bit non-varargs functions, the stack space for vectors is after the
   // stack space for non-vectors.  We do not use this space unless we have
@@ -2769,6 +2769,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
     (CallConv == CallingConv::Fast &&
      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
 
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -3141,17 +3147,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
 
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
@@ -3159,7 +3165,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   const unsigned NumFPRs = 13;
   const unsigned NumVRs  = array_lengthof(VR);
 
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 3534e9c..2e046c4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "PPC.h"
 #include "PPCSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
   namespace PPCISD {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 02bffed..78f3596 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -64,13 +64,7 @@ let Defs = [LR8] in
                     PPC970_Unit_BRU;
 
 // Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the PPC64 non-callee saved registers.
-  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_Darwin  : IForm<18, 0, 1,
@@ -90,13 +84,7 @@ let isCall = 1, PPC970_Unit = 7,
 
 // ELF 64 ABI Calls = Darwin ABI Calls
 // Used to define BL8_ELF and BLA8_ELF
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the PPC64 non-callee saved registers.
-  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_ELF  : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e5f171d..7d49aa1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -15,8 +15,8 @@
 #define POWERPC_INSTRUCTIONINFO_H
 
 #include "PPC.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "PPCGenInstrInfo.inc"
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e234012..939b71a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -438,13 +438,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
 }
 
 // Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the non-callee saved registers...
-  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_Darwin  : IForm<18, 0, 1,
@@ -463,13 +457,7 @@ let isCall = 1, PPC970_Unit = 7,
 }
 
 // SVR4 ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the non-callee saved registers...
-  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_SVR4  : IForm<18, 0, 1,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 306cc1f..2976f01 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
-#include "PPCRegisterInfo.h"
 #include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
 #include "llvm/CallingConv.h"
@@ -100,104 +100,20 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
 
 const uint16_t*
 PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  // 32-bit Darwin calling convention. 
-  static const uint16_t Darwin32_CalleeSavedRegs[] = {
-              PPC::R13, PPC::R14, PPC::R15,
-    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    PPC::LR,  0
-  };
-
-  // 32-bit SVR4 calling convention.
-  static const uint16_t SVR4_CalleeSavedRegs[] = {
-                        PPC::R14, PPC::R15,
-    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    
-    PPC::VRSAVE,
-    
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    0
-  };
-  // 64-bit Darwin calling convention. 
-  static const uint16_t Darwin64_CalleeSavedRegs[] = {
-    PPC::X14, PPC::X15,
-    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-    
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    PPC::LR8,  0
-  };
-
-  // 64-bit SVR4 calling convention.
-  static const uint16_t SVR4_64_CalleeSavedRegs[] = {
-    PPC::X14, PPC::X15,
-    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-
-    PPC::CR2, PPC::CR3, PPC::CR4,
-
-    PPC::VRSAVE,
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+                                 CSR_Darwin32_SaveList;
 
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+  return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
 
-    0
-  };
-  
+const unsigned*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   if (Subtarget.isDarwinABI())
-    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
-                                 Darwin32_CalleeSavedRegs;
+    return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+                                 CSR_Darwin32_RegMask;
 
-  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+  return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
 }
 
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 6ce90bc..b1e6a72 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -42,6 +42,7 @@ public:
 
   /// Code Generation virtual methods...
   const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index da20274..ba9c779 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "PPCTargetMachine.h"
+#include "PPC.h"
 #include "llvm/PassManager.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/Passes.h"
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 6dd11c9..7da2b0c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -24,8 +24,6 @@
 #include "llvm/Target/TargetData.h"
 
 namespace llvm {
-class PassManager;
-class GlobalValue;
 
 /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
 ///
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 1423b1e..9a729bd 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
 /// registers that correspond to it.
 static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
                              unsigned &OddReg) {
-  static const unsigned EvenHalvesOfPairs[] = {
+  static const uint16_t EvenHalvesOfPairs[] = {
     SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
     SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
   };
-  static const unsigned OddHalvesOfPairs[] = {
+  static const uint16_t OddHalvesOfPairs[] = {
     SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
     SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
   };
-  static const unsigned DoubleRegsInOrder[] = {
+  static const uint16_t DoubleRegsInOrder[] = {
     SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
     SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
   };
-  for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+  for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
     if (DoubleRegsInOrder[i] == DoubleReg) {
       EvenReg = EvenHalvesOfPairs[i];
       OddReg = OddHalvesOfPairs[i];
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index a6b63fb..ee12633 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -50,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
 {
-  static const unsigned RegList[] = {
+  static const uint16_t RegList[] = {
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
   };
   //Try to get first reg
@@ -301,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
     };
     unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
-    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
     unsigned ArgOffset = CCInfo.getNextStackOffset();
     if (NumAllocated == 6)
       ArgOffset += StackOffset;
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 4a7c479..f483c96 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -15,8 +15,8 @@
 #ifndef SPARC_ISELLOWERING_H
 #define SPARC_ISELLOWERING_H
 
-#include "llvm/Target/TargetLowering.h"
 #include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace SPISD {
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 4932531..204f698 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -14,8 +14,8 @@
 #ifndef SPARCINSTRUCTIONINFO_H
 #define SPARCINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index c392fcc..6357468 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "SparcRegisterInfo.h"
+#include "Sparc.h"
 #include "SparcSubtarget.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 80a3be6..6f31356 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "SparcTargetMachine.h"
+#include "Sparc.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d91830f..9e88472 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -67,11 +67,11 @@ private:
                                MCStreamer &Out);
 
   /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
-  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+  /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
   bool isSrcOp(X86Operand &Op);
 
-  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
-  /// or %es:(%edi) in 32bit mode.
+  /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
   bool isDstOp(X86Operand &Op);
 
   bool is64BitMode() const {
@@ -468,7 +468,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) {
 bool X86AsmParser::isDstOp(X86Operand &Op) {
   unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
 
-  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+  return Op.isMem() && 
+    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
     isa<MCConstantExpr>(Op.Mem.Disp) &&
     cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
     Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
@@ -838,6 +839,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
   // If we reached here, then we just ate the ( of the memory operand.  Process
   // the rest of the memory operand.
   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+  SMLoc IndexLoc;
 
   if (getLexer().is(AsmToken::Percent)) {
     SMLoc StartLoc, EndLoc;
@@ -851,6 +853,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 
   if (getLexer().is(AsmToken::Comma)) {
     Parser.Lex(); // Eat the comma.
+    IndexLoc = Parser.getTok().getLoc();
 
     // Following the comma we should have either an index register, or a scale
     // value. We don't support the later form, but we want to parse it
@@ -876,8 +879,10 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
           SMLoc Loc = Parser.getTok().getLoc();
 
           int64_t ScaleVal;
-          if (getParser().ParseAbsoluteExpression(ScaleVal))
+          if (getParser().ParseAbsoluteExpression(ScaleVal)){
+            Error(Loc, "expected scale expression");
             return 0;
+	  }
 
           // Validate the scale amount.
           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -910,6 +915,23 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
   SMLoc MemEnd = Parser.getTok().getLoc();
   Parser.Lex(); // Eat the ')'.
 
+  // If we have both a base register and an index register make sure they are
+  // both 64-bit or 32-bit registers.
+  if (BaseReg != 0 && IndexReg != 0) {
+    if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+        !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+        IndexReg != X86::RIZ) {
+      Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+      return 0;
+    }
+    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+        !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+        IndexReg != X86::EIZ){
+      Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+      return 0;
+    }
+  }
+
   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
                                MemStart, MemEnd);
 }
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b0e66f0..fbd81d2 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -312,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
     
     if (consumeByte(insn, &byte))
       return -1;
+
+    /*
+     * If the first byte is a LOCK prefix break and let it be disassembled
+     * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+     * FIXME there is currently no way to get the disassembler to print the
+     * lock prefix if it is not the first byte.
+     */
+    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+      break;
     
     switch (byte) {
     case 0xf0:  /* LOCK */
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 30a847f..f532019 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
                                   const char *(*getRegName)(unsigned)) {
   // If this is a shuffle operation, the switch should fill in this state.
-  SmallVector<unsigned, 8> ShuffleMask;
+  SmallVector<int, 8> ShuffleMask;
   const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
 
   switch (MI->getOpcode()) {
@@ -500,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     if (Src1Name == Src2Name) {
       for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
         if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
-            ShuffleMask[i] >= e)        // From second mask.
+            ShuffleMask[i] >= (int)e)        // From second mask.
           ShuffleMask[i] -= e;
       }
     }
@@ -518,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
       // Otherwise, it must come from src1 or src2.  Print the span of elements
       // that comes from this src.
-      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+      bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
       const char *SrcName = isSrc1 ? Src1Name : Src2Name;
       OS << (SrcName ? SrcName : "mem") << '[';
       bool IsFirst = true;
       while (i != e &&
              (int)ShuffleMask[i] >= 0 &&
-             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+             (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
         if (!IsFirst)
           OS << ',';
         else
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9ccbf1c..3f770f7 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86FixupKinds.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 37727b6..80990e5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -46,6 +46,11 @@ public:
     return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
   }
 
+  bool is32BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+  }
+
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86_MC::getX86RegNum(MO.getReg());
   }
@@ -154,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
   return MCFixup::getKindForSize(Size, isPCRel);
 }
 
-/// Is32BitMemOperand - Return true if the specified instruction with a memory
-/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
-/// memory operand.  Op specifies the operand # of the memoperand.
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
 static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
   const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
   const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
@@ -169,6 +173,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
   return false;
 }
 
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+  if ((BaseReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+  if ((BaseReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+
 /// StartsWithGlobalOffsetTable - Check if this expression starts with
 ///  _GLOBAL_OFFSET_TABLE_ and if it is of the form
 ///  _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
@@ -817,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     EmitByte(0xF3, CurByte, OS);
 
   // Emit the address size opcode prefix as needed.
-  if ((TSFlags & X86II::AdSize) ||
-      (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+  bool need_address_override;
+  if (TSFlags & X86II::AdSize) {
+    need_address_override = true;
+  } else if (MemOperand == -1) {
+    need_address_override = false;
+  } else if (is64BitMode()) {
+    assert(!Is16BitMemOperand(MI, MemOperand));
+    need_address_override = Is32BitMemOperand(MI, MemOperand);
+  } else if (is32BitMode()) {
+    assert(!Is64BitMemOperand(MI, MemOperand));
+    need_address_override = Is16BitMemOperand(MI, MemOperand);
+  } else {
+    need_address_override = false;
+  }
+
+  if (need_address_override)
     EmitByte(0x67, CurByte, OS);
 
   // Emit the operand size opcode prefix as needed.
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index a581993..624e56f 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,3 +922,22 @@ _test2:                                 ## @test2
 The insertps's of $0 are pointless complex copies.
 
 //===---------------------------------------------------------------------===//
+
+[UNSAFE FP]
+
+void foo(double, double, double);
+void norm(double x, double y, double z) {
+  double scale = __builtin_sqrt(x*x + y*y + z*z);
+  foo(x/scale, y/scale, z/scale);
+}
+
+We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is
+slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first
+and emit 3 mulsd in place of the divs. This can be done as a target-independent
+transform.
+
+If we're dealing with floats instead of doubles we could even replace the sqrtss
+and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
+cost of reduced accuracy.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f4b85ae..32c722a 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   // Defaults the copying the dest value.
   ShuffleMask.push_back(0);
   ShuffleMask.push_back(1);
@@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
 }
 
 // <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = NElts/2; i != NElts; ++i)
     ShuffleMask.push_back(NElts+i);
 
@@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts,
 }
 
 // <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = 0; i != NElts/2; ++i)
     ShuffleMask.push_back(i);
 
@@ -66,8 +64,7 @@ void DecodeMOVLHPSMask(unsigned NElts,
 /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
 /// VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodePSHUFMask(EVT VT, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -83,8 +80,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm,
   }
 }
 
-void DecodePSHUFHWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   ShuffleMask.push_back(0);
   ShuffleMask.push_back(1);
   ShuffleMask.push_back(2);
@@ -95,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm,
   }
 }
 
-void DecodePSHUFLWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = 0; i != 4; ++i) {
     ShuffleMask.push_back((Imm & 3));
     Imm >>= 2;
@@ -110,8 +105,7 @@ void DecodePSHUFLWMask(unsigned Imm,
 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
 /// the type of the vector allowing it to handle different datatypes and vector
 /// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   unsigned NumLanes = VT.getSizeInBits() / 128;
@@ -136,7 +130,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm,
 /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
 /// and punpckh*. VT indicates the type of the vector allowing it to handle
 /// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -156,7 +150,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// and punpckl*. VT indicates the type of the vector allowing it to handle
 /// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -174,7 +168,7 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
 }
 
 void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask) {
+                          SmallVectorImpl<int> &ShuffleMask) {
   unsigned HalfSize = VT.getVectorNumElements()/2;
   unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
   unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 877c9bd..5b8c6ef 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -24,47 +24,41 @@
 
 namespace llvm {
 enum {
-  SM_SentinelZero = ~0U
+  SM_SentinelZero = -1
 };
 
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 // <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 
 // <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFMask(EVT VT, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFHWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFLWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
 /// the type of the vector allowing it to handle different datatypes and vector
 /// widths.
-void DecodeSHUFPMask(EVT VT, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
 /// and punpckh*. VT indicates the type of the vector allowing it to handle
 /// different datatypes and vector widths.
-void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
 
 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// and punpckl*. VT indicates the type of the vector allowing it to handle
 /// different datatypes and vector widths.
-void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
 
 
 void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask);
+                          SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace
 
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 81e9422..ecc7b59 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,8 +24,6 @@ namespace llvm {
 
 class FunctionPass;
 class JITCodeEmitter;
-class MachineCodeEmitter;
-class Target;
 class X86TargetMachine;
 
 /// createX86ISelDag - This pass converts a legalized DAG into a 
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 268cbf4..f1cedf3 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,13 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 1058df5..a6ed9ba 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -24,11 +24,7 @@
 
 namespace llvm {
 
-class MachineJumpTableInfo;
-class MCContext;
-class MCInst;
 class MCStreamer;
-class MCSymbol;
 
 class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 63c08f1..0cec95a 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -14,9 +14,9 @@
 #ifndef X86COFF_MACHINEMODULEINFO_H
 #define X86COFF_MACHINEMODULEINFO_H
 
+#include "X86MachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/DenseSet.h"
-#include "X86MachineFunctionInfo.h"
 
 namespace llvm {
   class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f90764e..3d63b7e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1779,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
     // Count the number of XMM registers allocated.
-    static const unsigned XMMArgRegs[] = {
+    static const uint16_t XMMArgRegs[] = {
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 32de194..936df27 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,6 +26,7 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -37,7 +38,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -570,8 +570,8 @@ void FPS::finishBlockStack() {
 
 namespace {
   struct TableEntry {
-    unsigned from;
-    unsigned to;
+    uint16_t from;
+    uint16_t to;
     bool operator<(const TableEntry &TE) const { return from < TE.from; }
     friend bool operator<(const TableEntry &TE, unsigned V) {
       return TE.from < V;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index aa508b8..9405c2f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -21,7 +21,6 @@
 #include "X86TargetMachine.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,6 +31,7 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -1654,7 +1654,7 @@ enum AtomicSz {
   AtomicSzEnd
 };
 
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
   {
     X86::LOCK_OR8mi,
     X86::LOCK_OR8mr,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cae9aad..88f3829 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1927,17 +1927,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
 
       // FIXME: We should really autogenerate these arrays
-      static const unsigned GPR64ArgRegsWin64[] = {
+      static const uint16_t GPR64ArgRegsWin64[] = {
         X86::RCX, X86::RDX, X86::R8,  X86::R9
       };
-      static const unsigned GPR64ArgRegs64Bit[] = {
+      static const uint16_t GPR64ArgRegs64Bit[] = {
         X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
       };
-      static const unsigned XMMArgRegs64Bit[] = {
+      static const uint16_t XMMArgRegs64Bit[] = {
         X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
         X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
       };
-      const unsigned *GPR64ArgRegs;
+      const uint16_t *GPR64ArgRegs;
       unsigned NumXMMRegs = 0;
 
       if (IsWin64) {
@@ -2326,7 +2326,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // registers used and is in the range 0 - 8 inclusive.
 
     // Count the number of XMM registers allocated.
-    static const unsigned XMMArgRegs[] = {
+    static const uint16_t XMMArgRegs[] = {
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
@@ -2910,7 +2910,7 @@ static bool isTargetShuffle(unsigned Opcode) {
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-                                               SDValue V1, SelectionDAG &DAG) {
+                                    SDValue V1, SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::MOVSHDUP:
@@ -2921,7 +2921,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-                          SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+                                    SDValue V1, unsigned TargetMask,
+                                    SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::PSHUFD:
@@ -2933,7 +2934,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-               SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+                                    SDValue V1, SDValue V2, unsigned TargetMask,
+                                    SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::PALIGN:
@@ -3712,6 +3714,8 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
 static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
                                bool V2IsSplat = false, bool V2IsUndef = false) {
   unsigned NumOps = VT.getVectorNumElements();
+  if (VT.getSizeInBits() == 256)
+    return false;
   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
     return false;
 
@@ -4342,9 +4346,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
   return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
 }
 
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, EVT VT,
+                                 SmallVectorImpl<int> &Mask, bool &IsUnary) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SDValue ImmN;
+
+  IsUnary = false;
+  switch(N->getOpcode()) {
+  case X86ISD::SHUFP:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    break;
+  case X86ISD::UNPCKH:
+    DecodeUNPCKHMask(VT, Mask);
+    break;
+  case X86ISD::UNPCKL:
+    DecodeUNPCKLMask(VT, Mask);
+    break;
+  case X86ISD::MOVHLPS:
+    DecodeMOVHLPSMask(NumElems, Mask);
+    break;
+  case X86ISD::MOVLHPS:
+    DecodeMOVLHPSMask(NumElems, Mask);
+    break;
+  case X86ISD::PSHUFD:
+  case X86ISD::VPERMILP:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::PSHUFHW:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::PSHUFLW:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD: {
+    // The index 0 always comes from the first element of the second source,
+    // this is why MOVSS and MOVSD are used in the first place. The other
+    // elements come from the other positions of the first source vector
+    Mask.push_back(NumElems);
+    for (unsigned i = 1; i != NumElems; ++i) {
+      Mask.push_back(i);
+    }
+    break;
+  }
+  case X86ISD::VPERM2X128:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    break;
+  case X86ISD::MOVDDUP:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::PALIGN:
+    // Not yet implemented
+    return false;
+  default: llvm_unreachable("unknown target shuffle node");
+  }
+
+  return true;
+}
+
 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
 /// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
                                    unsigned Depth) {
   if (Depth == 6)
     return SDValue();  // Limit search depth.
@@ -4355,89 +4431,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 
   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
-    Index = SV->getMaskElt(Index);
+    int Elt = SV->getMaskElt(Index);
 
-    if (Index < 0)
+    if (Elt < 0)
       return DAG.getUNDEF(VT.getVectorElementType());
 
     unsigned NumElems = VT.getVectorNumElements();
-    SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0)
-                                           : SV->getOperand(1);
-    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+    SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+                                         : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
   }
 
   // Recurse into target specific vector shuffles to find scalars.
   if (isTargetShuffle(Opcode)) {
     unsigned NumElems = VT.getVectorNumElements();
-    SmallVector<unsigned, 16> ShuffleMask;
+    SmallVector<int, 16> ShuffleMask;
     SDValue ImmN;
+    bool IsUnary;
 
-    switch(Opcode) {
-    case X86ISD::SHUFP:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                      ShuffleMask);
-      break;
-    case X86ISD::UNPCKH:
-      DecodeUNPCKHMask(VT, ShuffleMask);
-      break;
-    case X86ISD::UNPCKL:
-      DecodeUNPCKLMask(VT, ShuffleMask);
-      break;
-    case X86ISD::MOVHLPS:
-      DecodeMOVHLPSMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::MOVLHPS:
-      DecodeMOVLHPSMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::PSHUFD:
-    case X86ISD::VPERMILP:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                      ShuffleMask);
-      break;
-    case X86ISD::PSHUFHW:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::PSHUFLW:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::MOVSS:
-    case X86ISD::MOVSD: {
-      // The index 0 always comes from the first element of the second source,
-      // this is why MOVSS and MOVSD are used in the first place. The other
-      // elements come from the other positions of the first source vector.
-      unsigned OpNum = (Index == 0) ? 1 : 0;
-      return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
-                                 Depth+1);
-    }
-    case X86ISD::VPERM2X128:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                           ShuffleMask);
-      break;
-    case X86ISD::MOVDDUP:
-    case X86ISD::MOVLHPD:
-    case X86ISD::MOVLPD:
-    case X86ISD::MOVLPS:
-    case X86ISD::MOVSHDUP:
-    case X86ISD::MOVSLDUP:
-    case X86ISD::PALIGN:
-      return SDValue(); // Not yet implemented.
-    default: llvm_unreachable("unknown target shuffle node");
-    }
-
-    Index = ShuffleMask[Index];
-    if (Index < 0)
+    if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
+      return SDValue();
+
+    int Elt = ShuffleMask[Index];
+    if (Elt < 0)
       return DAG.getUNDEF(VT.getVectorElementType());
 
-    SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0)
+    SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
                                            : N->getOperand(1);
-    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
                                Depth+1);
   }
 
@@ -4453,7 +4474,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 
   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
     return (Index == 0) ? V.getOperand(0)
-                          : DAG.getUNDEF(VT.getVectorElementType());
+                        : DAG.getUNDEF(VT.getVectorElementType());
 
   if (V.getOpcode() == ISD::BUILD_VECTOR)
     return V.getOperand(Index);
@@ -4465,38 +4486,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 /// shuffle operation which come from a consecutively from a zero. The
 /// search can start in two different directions, from left or right.
 static
-unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
                                   bool ZerosFromLeft, SelectionDAG &DAG) {
-  int i = 0;
-
-  while (i < NumElems) {
+  unsigned i;
+  for (i = 0; i != NumElems; ++i) {
     unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
-    SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+    SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
     if (!(Elt.getNode() &&
          (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
       break;
-    ++i;
   }
 
   return i;
 }
 
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
-/// MaskE correspond consecutively to elements from one of the vector operands,
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
 /// starting from its index OpIdx. Also tell OpNum which source vector operand.
 static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
-                              int OpIdx, int NumElems, unsigned &OpNum) {
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+                              unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+                              unsigned NumElems, unsigned &OpNum) {
   bool SeenV1 = false;
   bool SeenV2 = false;
 
-  for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+  for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
     int Idx = SVOp->getMaskElt(i);
     // Ignore undef indicies
     if (Idx < 0)
       continue;
 
-    if (Idx < NumElems)
+    if (Idx < (int)NumElems)
       SeenV1 = true;
     else
       SeenV2 = true;
@@ -4531,7 +4551,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   //
   if (!isShuffleMaskConsecutive(SVOp,
             0,                   // Mask Start Index
-            NumElems-NumZeros-1, // Mask End Index
+            NumElems-NumZeros,   // Mask End Index(exclusive)
             NumZeros,            // Where to start looking in the src vector
             NumElems,            // Number of elements in vector
             OpSrc))              // Which source operand ?
@@ -4564,7 +4584,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   //
   if (!isShuffleMaskConsecutive(SVOp,
             NumZeros,     // Mask Start Index
-            NumElems-1,   // Mask End Index
+            NumElems,     // Mask End Index(exclusive)
             0,            // Where to start looking in the src vector
             NumElems,     // Number of elements in vector
             OpSrc))       // Which source operand ?
@@ -6080,88 +6100,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
   return false;
 }
 
-/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
-/// a vector extract, and if both can be later optimized into a single load.
-/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
-/// here because otherwise a target specific shuffle node is going to be
-/// emitted for this shuffle, and the optimization not done.
-/// FIXME: This is probably not the best approach, but fix the problem
-/// until the right path is decided.
-static
-bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
-                                         const TargetLowering &TLI) {
-  EVT VT = V.getValueType();
-  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
-
-  // Be sure that the vector shuffle is present in a pattern like this:
-  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
-  if (!V.hasOneUse())
-    return false;
-
-  SDNode *N = *V.getNode()->use_begin();
-  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-    return false;
-
-  SDValue EltNo = N->getOperand(1);
-  if (!isa<ConstantSDNode>(EltNo))
-    return false;
-
-  // If the bit convert changed the number of elements, it is unsafe
-  // to examine the mask.
-  bool HasShuffleIntoBitcast = false;
-  if (V.getOpcode() == ISD::BITCAST) {
-    EVT SrcVT = V.getOperand(0).getValueType();
-    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
-      return false;
-    V = V.getOperand(0);
-    HasShuffleIntoBitcast = true;
-  }
-
-  // Select the input vector, guarding against out of range extract vector.
-  unsigned NumElems = VT.getVectorNumElements();
-  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
-  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
-
-  // If we are accessing the upper part of a YMM register
-  // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of
-  // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point
-  // because the legalization of N did not happen yet.
-  if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256)
-    return false;
-
-  // Skip one more bit_convert if necessary
-  if (V.getOpcode() == ISD::BITCAST) {
-    if (!V.hasOneUse())
-      return false;
-    V = V.getOperand(0);
-  }
-
-  if (!ISD::isNormalLoad(V.getNode()))
-    return false;
-
-  // Is the original load suitable?
-  LoadSDNode *LN0 = cast<LoadSDNode>(V);
-
-  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
-    return false;
-
-  if (!HasShuffleIntoBitcast)
-    return true;
-
-  // If there's a bitcast before the shuffle, check if the load type and
-  // alignment is valid.
-  unsigned Align = LN0->getAlignment();
-  unsigned NewAlign =
-    TLI.getTargetData()->getABITypeAlignment(
-                                  VT.getTypeForEVT(*DAG.getContext()));
-
-  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
-    return false;
-
-  return true;
-}
-
 static
 SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
@@ -6282,12 +6220,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
   if (SVOp->isSplat()) {
     unsigned NumElem = VT.getVectorNumElements();
     int Size = VT.getSizeInBits();
-    // Special case, this is the only place now where it's allowed to return
-    // a vector_shuffle operation without using a target specific node, because
-    // *hopefully* it will be optimized away by the dag combiner. FIXME: should
-    // this be moved to DAGCombine instead?
-    if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
-      return Op;
 
     // Use vbroadcast whenever the splat comes from a foldable load
     SDValue LD = isVectorBroadcast(Op, Subtarget);
@@ -13005,11 +12937,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue InVec = N->getOperand(0);
+  SDValue EltNo = N->getOperand(1);
+
+  if (!isa<ConstantSDNode>(EltNo))
+    return SDValue();
+
+  EVT VT = InVec.getValueType();
+
+  bool HasShuffleIntoBitcast = false;
+  if (InVec.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!InVec.hasOneUse())
+      return SDValue();
+    EVT BCVT = InVec.getOperand(0).getValueType();
+    if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+      return SDValue();
+    InVec = InVec.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
+
+  if (!isTargetShuffle(InVec.getOpcode()))
+    return SDValue();
+
+  // Don't duplicate a load with other uses.
+  if (!InVec.hasOneUse())
+    return SDValue();
+
+  SmallVector<int, 16> ShuffleMask;
+  bool UnaryShuffle;
+  if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+    return SDValue();
+
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+  SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+                                         : InVec.getOperand(1);
+
+  // If inputs to shuffle are the same for both ops, then allow 2 uses
+  unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+  if (LdNode.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+      return SDValue();
+
+    AllowedUses = 1; // only allow 1 load use if we have a bitcast
+    LdNode = LdNode.getOperand(0);
+  }
+
+  if (!ISD::isNormalLoad(LdNode.getNode()))
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+  if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+    return SDValue();
+
+  if (HasShuffleIntoBitcast) {
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    unsigned NewAlign = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return SDValue();
+  }
+
+  // All checks match so transform back to vector_shuffle so that DAG combiner
+  // can finish the job
+  DebugLoc dl = N->getDebugLoc();
+
+  // Create shuffle node taking into account the case that its a unary shuffle
+  SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+  Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+                                 InVec.getOperand(0), Shuffle,
+                                 &ShuffleMask[0]);
+  Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+                     EltNo);
+}
+
 /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
 /// generation and convert it from being a bunch of shuffles and extracts
 /// to a simple store and scalar loads to extract the elements.
 static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
-                                                const TargetLowering &TLI) {
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+  if (NewOp.getNode())
+    return NewOp;
+
   SDValue InputVector = N->getOperand(0);
 
   // Only operate on vectors of 4 elements, where the alternative shuffling
@@ -13070,6 +13100,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     unsigned EltSize =
         InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
     uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
 
     SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@@ -13093,6 +13124,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const X86Subtarget *Subtarget) {
+
+
   DebugLoc DL = N->getDebugLoc();
   SDValue Cond = N->getOperand(0);
   // Get the LHS/RHS of the select.
@@ -14897,7 +14930,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::EXTRACT_VECTOR_ELT:
-    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
   case ISD::VSELECT:
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, DCI, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index ac49232..42a5014 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                     "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)]>;
+                    [(X86ehret GR32:$addr)], IIC_RET>;
 
 }
 
@@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
                      "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)]>;
+                     [(X86ehret GR64:$addr)], IIC_RET>;
 
 }
 
@@ -193,7 +193,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in {
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)]>;
+                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
 
 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
 // encoding and avoids a partial-register update sometimes, but doing so
@@ -202,11 +202,11 @@ def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
 // to an MCInst.
 def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
                  "",
-                 [(set GR16:$dst, 0)]>, OpSize;
+                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
 
 // FIXME: Set encoding to pseudo.
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)]>;
+                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
 }
 
 // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -218,7 +218,7 @@ def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
 let Defs = [EFLAGS], isCodeGenOnly=1,
     AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)]>;
+                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
 
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -226,7 +226,8 @@ def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
-                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
+                        "", [(set GR64:$dst, i64immZExt32:$src)],
+                        IIC_ALU_NONMEM>;
 
 // Use sbb to materialize carry bit.
 let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
@@ -236,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
 // FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
 // X86CodeEmitter.
 def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
-                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
-                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>,
                 OpSize;
 def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 } // isCodeGenOnly
 
 
@@ -297,32 +302,32 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
 //
 let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
 def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                  [(X86rep_movs i8)]>, REP;
+                  [(X86rep_movs i8)], IIC_REP_MOVS>, REP;
 def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                  [(X86rep_movs i16)]>, REP, OpSize;
+                  [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize;
 def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                  [(X86rep_movs i32)]>, REP;
+                  [(X86rep_movs i32)], IIC_REP_MOVS>, REP;
 }
 
 let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
 def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
-                   [(X86rep_movs i64)]>, REP;
+                   [(X86rep_movs i64)], IIC_REP_MOVS>, REP;
 
 
 // FIXME: Should use "(X86rep_stos AL)" as the pattern.
 let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                  [(X86rep_stos i8)]>, REP;
+                  [(X86rep_stos i8)], IIC_REP_STOS>, REP;
 let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                  [(X86rep_stos i16)]>, REP, OpSize;
+                  [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize;
 let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
 def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                  [(X86rep_stos i32)]>, REP;
+                  [(X86rep_stos i32)], IIC_REP_STOS>, REP;
 
 let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
 def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
-                   [(X86rep_stos i64)]>, REP;
+                   [(X86rep_stos i64)], IIC_REP_STOS>, REP;
 
 
 //===----------------------------------------------------------------------===//
@@ -571,7 +576,7 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in
 def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
                       "lock\n\t"
                       "or{l}\t{$zero, $dst|$dst, $zero}",
-                      []>, Requires<[In32BitMode]>, LOCK;
+                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
 
 let hasSideEffects = 1 in
 def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
@@ -591,72 +596,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                    MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
                    !strconcat("lock\n\t", mnemonic, "{b}\t",
                               "{$src2, $dst|$dst, $src2}"),
-                   []>, LOCK;
+                   [], IIC_ALU_NONMEM>, LOCK;
 def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                     MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                     !strconcat("lock\n\t", mnemonic, "{w}\t",
                                "{$src2, $dst|$dst, $src2}"),
-                    []>, OpSize, LOCK;
+                    [], IIC_ALU_NONMEM>, OpSize, LOCK;
 def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                     MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
                     !strconcat("lock\n\t", mnemonic, "{l}\t",
                                "{$src2, $dst|$dst, $src2}"),
-                    []>, LOCK;
+                    [], IIC_ALU_NONMEM>, LOCK;
 def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                      RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                      MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
                      !strconcat("lock\n\t", mnemonic, "{q}\t",
                                 "{$src2, $dst|$dst, $src2}"),
-                     []>, LOCK;
+                     [], IIC_ALU_NONMEM>, LOCK;
 
 def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
                      ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
                      !strconcat("lock\n\t", mnemonic, "{b}\t",
                                 "{$src2, $dst|$dst, $src2}"),
-                     []>, LOCK;
+                     [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                        ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                        ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{w}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                        ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                        ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{l}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                           ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                           ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
                           !strconcat("lock\n\t", mnemonic, "{q}\t",
                                      "{$src2, $dst|$dst, $src2}"),
-                          []>, LOCK;
+                          [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                        ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{w}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                        ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{l}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                         ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                         ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
                         !strconcat("lock\n\t", mnemonic, "{q}\t",
                                    "{$src2, $dst|$dst, $src2}"),
-                        []>, LOCK;
+                        [], IIC_ALU_MEM>, LOCK;
 
 }
 
@@ -673,29 +678,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
 
 def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
                     "lock\n\t"
-                    "inc{b}\t$dst", []>, LOCK;
+                    "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
                     "lock\n\t"
-                    "inc{w}\t$dst", []>, OpSize, LOCK;
+                    "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
 def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
                     "lock\n\t"
-                    "inc{l}\t$dst", []>, LOCK;
+                    "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
                      "lock\n\t"
-                     "inc{q}\t$dst", []>, LOCK;
+                     "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 
 def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
                     "lock\n\t"
-                    "dec{b}\t$dst", []>, LOCK;
+                    "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
                     "lock\n\t"
-                    "dec{w}\t$dst", []>, OpSize, LOCK;
+                    "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
 def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
                     "lock\n\t"
-                    "dec{l}\t$dst", []>, LOCK;
+                    "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
                       "lock\n\t"
-                      "dec{q}\t$dst", []>, LOCK;
+                      "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 }
 
 // Atomic compare and swap.
@@ -704,42 +709,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
 def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
                "lock\n\t"
                "cmpxchg8b\t$ptr",
-               [(X86cas8 addr:$ptr)]>, TB, LOCK;
+               [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
     isCodeGenOnly = 1 in
 def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
                     "lock\n\t"
                     "cmpxchg16b\t$ptr",
-                    [(X86cas16 addr:$ptr)]>, TB, LOCK,
+                    [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
                     Requires<[HasCmpxchg16b]>;
 
 let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
 def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
                "lock\n\t"
                "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
 }
 
 let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
 def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
                "lock\n\t"
                "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+               [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
 }
 
 let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
 def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
                "lock\n\t"
                "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
 }
 
 let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
 def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
                "lock\n\t"
                "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
 }
 
 // Atomic exchange and add
@@ -747,22 +752,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
 def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
                "lock\n\t"
                "xadd{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
+                IIC_XADD_LOCK_MEM8>,
                 TB, LOCK;
 def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
                "lock\n\t"
                "xadd{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, OpSize, LOCK;
 def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
                "lock\n\t"
                "xadd{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, LOCK;
 def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
                "lock\n\t"
                "xadd{q}\t{$val, $ptr|$ptr, $val}",
-               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, LOCK;
 }
 
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4f9f089..ae3ed1b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+  return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
 // Like 'load', but always requires 256-bit vector alignment.
 def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 32;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 5a479f0..307c96b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -25,13 +25,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include <limits>
 
 #define GET_INSTRINFO_CTOR
@@ -82,6 +82,12 @@ enum {
   TB_FOLDED_STORE = 1 << 19
 };
 
+struct X86OpTblEntry {
+  uint16_t RegOp;
+  uint16_t MemOp;
+  uint32_t Flags;
+};
+
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
                      ? X86::ADJCALLSTACKDOWN64
@@ -91,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
                      : X86::ADJCALLSTACKUP32)),
     TM(tm), RI(tm, *this) {
 
-  static const unsigned OpTbl2Addr[][3] = {
+  static const X86OpTblEntry OpTbl2Addr[] = {
     { X86::ADC32ri,     X86::ADC32mi,    0 },
     { X86::ADC32ri8,    X86::ADC32mi8,   0 },
     { X86::ADC32rr,     X86::ADC32mr,    0 },
@@ -259,16 +265,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
-    unsigned RegOp = OpTbl2Addr[i][0];
-    unsigned MemOp = OpTbl2Addr[i][1];
-    unsigned Flags = OpTbl2Addr[i][2];
+    unsigned RegOp = OpTbl2Addr[i].RegOp;
+    unsigned MemOp = OpTbl2Addr[i].MemOp;
+    unsigned Flags = OpTbl2Addr[i].Flags;
     AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 0, folded load and store, no alignment requirement.
                   Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
-  static const unsigned OpTbl0[][3] = {
+  static const X86OpTblEntry OpTbl0[] = {
     { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
     { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
     { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp      = OpTbl0[i][0];
-    unsigned MemOp      = OpTbl0[i][1];
-    unsigned Flags      = OpTbl0[i][2];
+    unsigned RegOp      = OpTbl0[i].RegOp;
+    unsigned MemOp      = OpTbl0[i].MemOp;
+    unsigned Flags      = OpTbl0[i].Flags;
     AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
                   RegOp, MemOp, TB_INDEX_0 | Flags);
   }
 
-  static const unsigned OpTbl1[][3] = {
+  static const X86OpTblEntry OpTbl1[] = {
     { X86::CMP16rr,         X86::CMP16rm,             0 },
     { X86::CMP32rr,         X86::CMP32rm,             0 },
     { X86::CMP64rr,         X86::CMP64rm,             0 },
@@ -555,16 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
-    unsigned RegOp = OpTbl1[i][0];
-    unsigned MemOp = OpTbl1[i][1];
-    unsigned Flags = OpTbl1[i][2];
+    unsigned RegOp = OpTbl1[i].RegOp;
+    unsigned MemOp = OpTbl1[i].MemOp;
+    unsigned Flags = OpTbl1[i].Flags;
     AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 1, folded load
                   Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
   }
 
-  static const unsigned OpTbl2[][3] = {
+  static const X86OpTblEntry OpTbl2[] = {
     { X86::ADC32rr,         X86::ADC32rm,       0 },
     { X86::ADC64rr,         X86::ADC64rm,       0 },
     { X86::ADD16rr,         X86::ADD16rm,       0 },
@@ -1108,9 +1114,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
-    unsigned RegOp = OpTbl2[i][0];
-    unsigned MemOp = OpTbl2[i][1];
-    unsigned Flags = OpTbl2[i][2];
+    unsigned RegOp = OpTbl2[i].RegOp;
+    unsigned MemOp = OpTbl2[i].MemOp;
+    unsigned Flags = OpTbl2[i].Flags;
     AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 2, folded load
@@ -3627,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
 // These are the replaceable SSE instructions. Some of these have Int variants
 // that we don't include here. We don't want to replace instructions selected
 // by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
   //PackedSingle     PackedDouble    PackedInt
   { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
   { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
@@ -3667,7 +3673,7 @@ static const unsigned ReplaceableInstrs[][3] = {
   { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr }
 };
 
-static const unsigned ReplaceableInstrsAVX2[][3] = {
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
   //PackedSingle       PackedDouble       PackedInt
   { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
   { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
@@ -3688,14 +3694,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
 // FIXME: Some shuffle and unpack instructions have equivalents in different
 // domains, but they require a bit more work than just switching opcodes.
 
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
   for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
     if (ReplaceableInstrs[i][domain-1] == opcode)
       return ReplaceableInstrs[i];
   return 0;
 }
 
-static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) {
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
   for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
     if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
       return ReplaceableInstrsAVX2[i];
@@ -3718,7 +3724,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   assert(Domain>0 && Domain<4 && "Invalid execution domain");
   uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
   assert(dom && "Not an SSE instruction");
-  const unsigned *table = lookup(MI->getOpcode(), dom);
+  const uint16_t *table = lookup(MI->getOpcode(), dom);
   if (!table) { // try the other table
     assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
            "256-bit vector operations only available in AVX2");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d065d2d..b23d756 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -14,10 +14,10 @@
 #ifndef X86INSTRUCTIONINFO_H
 #define X86INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "X86.h"
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "X86GenInstrInfo.inc"
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f585b47..dd7cf50 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1856,19 +1856,19 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
 def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
 
 // shld/shrd op,op -> shld op, op, CL
-def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
-def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
-def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
-def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
-
-def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
-def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
-def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
-def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
 
 /*  FIXME: This is disabled because the asm matcher is currently incapable of
  *  matching a fixed immediate like $1.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c6d1d19..df42627 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -641,7 +641,7 @@ let Predicates = [HasAVX] in {
               (VMOVSDrr (v2i64 (V_SET0)),
                         (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
 
-// Extract and store.
+  // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                    addr:$dst),
             (VMOVSSmr addr:$dst,
@@ -2306,7 +2306,7 @@ let Defs = [EFLAGS] in {
                                   "comisd", SSEPackedDouble>, TB, OpSize;
 } // Defs = [EFLAGS]
 
-// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
                             Intrinsic Int, string asm, string asm_alt,
                             Domain d> {
@@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
 }
 
 let Predicates = [HasAVX] in {
-def : Pat<(v4i64 (X86vzload addr:$src)),
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
           (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -7307,6 +7309,24 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
           []>, VEX;
 }
 
+// Extract and store.
+let Predicates = [HasAVX] in {
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+
+  def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+}
+
+// AVX1 patterns
 let Predicates = [HasAVX] in {
 def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
@@ -7314,6 +7334,31 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
 def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4f32 (VEXTRACTF128rr
+                    (v8f32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2f64 (VEXTRACTF128rr
+                    (v4f64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTF128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v8i16 (VEXTRACTF128rr
+                    (v16i16 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v16i8 (VEXTRACTF128rr
+                    (v32i8 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7711,7 +7756,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
             (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
              imm:$src3))]>, VEX_4V;
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
 def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
                                    (i32 imm)),
           (VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -7756,6 +7801,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
                                    (i32 imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7791,34 +7849,6 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
                     (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
 }
 
-// AVX1 patterns
-let Predicates = [HasAVX] in {
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v4f32 (VEXTRACTF128rr
-                    (v8f32 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v2f64 (VEXTRACTF128rr
-                    (v4f64 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v2i64 (VEXTRACTF128rr
-                    (v4i64 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v4i32 (VEXTRACTF128rr
-                    (v8i32 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v8i16 (VEXTRACTF128rr
-                    (v16i16 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v16i8 (VEXTRACTF128rr
-                    (v32i8 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-}
-
 //===----------------------------------------------------------------------===//
 // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
 //
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 8843848..bddba6c 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -45,17 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
 
 
 def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
-def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+def SYSRET   : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
                Requires<[In64BitMode]>;
 
 def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
                  
-def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB;
-def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB,
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
                 Requires<[In64BitMode]>;
 
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
 def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
 def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
              Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index a7a5c56..b578e8d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,10 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -26,7 +27,6 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Type.h"
 using namespace llvm;
 
 X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 93e2744..b56025f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86.h"
 #include "X86RegisterInfo.h"
+#include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index d6d0149..17f4efd 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -114,6 +114,9 @@ def IIC_MOVZX : InstrItinClass;
 def IIC_MOVZX_R16_R8 : InstrItinClass;
 def IIC_MOVZX_R16_M8 : InstrItinClass;
 
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
 // SSE scalar/parallel binary operations
 def IIC_SSE_ALU_F32S_RR : InstrItinClass;
 def IIC_SSE_ALU_F32S_RM : InstrItinClass;
@@ -250,6 +253,14 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
 def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
 def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
 
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
 
 //===----------------------------------------------------------------------===//
 // Processor instruction itineraries.
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index e8cf72a..77d4e56 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -144,6 +144,9 @@ def AtomItineraries : ProcessorItineraries<
   InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
   InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
 
+  InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+  InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
   // SSE binary operations
   // arithmetic fp scalar
   InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
@@ -289,6 +292,14 @@ def AtomItineraries : ProcessorItineraries<
   InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
   InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
   InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
-  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >
-]>;
+  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+  ]>;
 
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index a36d0d8..7fd832b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,9 +14,9 @@
 #ifndef X86SUBTARGET_H
 #define X86SUBTARGET_H
 
+#include "llvm/CallingConv.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/CallingConv.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 143caba..8e935af 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -28,7 +28,6 @@
 
 namespace llvm {
 
-class formatted_raw_ostream;
 class StringRef;
 
 class X86TargetMachine : public LLVMTargetMachine {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index ceb7a4a..a02a368 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -15,7 +15,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-  class X86TargetMachine;
 
   /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
   /// x86-64.
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 4d8ef74..50fda58 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCore.h"
 #include "XCoreFrameLowering.h"
+#include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMachineFunctionInfo.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index c2d2a5d..593cebc 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1152,7 +1152,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
 
   if (isVarArg) {
     /* Argument registers */
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       XCore::R0, XCore::R1, XCore::R2, XCore::R3
     };
     XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f5a6822..5cd3e67 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -15,9 +15,9 @@
 #ifndef XCOREISELLOWERING_H
 #define XCOREISELLOWERING_H
 
+#include "XCore.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
-#include "XCore.h"
 
 namespace llvm {
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index f930623..0a3008d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCoreMachineFunctionInfo.h"
 #include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e47d212..42eeed8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -14,8 +14,8 @@
 #ifndef XCOREINSTRUCTIONINFO_H
 #define XCOREINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 8730282..f3b4b4c 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -14,6 +14,8 @@
 #include "XCoreRegisterInfo.h"
 #include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,8 +26,6 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -54,20 +54,6 @@ static inline bool isImmU16(unsigned val) {
   return val < (1 << 16);
 }
 
-static const unsigned XCore_ArgRegs[] = {
-  XCore::R0, XCore::R1, XCore::R2, XCore::R3
-};
-
-const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
-{
-  return XCore_ArgRegs;
-}
-
-unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
-{
-  return array_lengthof(XCore_ArgRegs);
-}
-
 bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
   return MF.getMMI().hasDebugInfo() ||
     MF.getFunction()->needsUnwindTableEntry();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index ab6ce56..7391cfd 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -62,15 +62,6 @@ public:
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
 
-  //! Return the array of argument passing registers
-  /*!
-    \note The size of this array is returned by getArgRegsSize().
-    */
-  static const unsigned *getArgRegs(const MachineFunction *MF = 0);
-
-  //! Return the size of the argument passing register array
-  static unsigned getNumArgRegs(const MachineFunction *MF = 0);
-  
   //! Return whether to emit frame moves
   static bool needsFrameMoves(const MachineFunction &MF);
 };
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 2c174f4..2546681 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,13 +14,13 @@
 #ifndef XCORETARGETMACHINE_H
 #define XCORETARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
 #include "XCoreFrameLowering.h"
 #include "XCoreSubtarget.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
 #include "XCoreSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
 
 namespace llvm {
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 58ab567..a32e550 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2561,11 +2561,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           return false;
         delete ValueStack.pop_back_val();
         InstResult = RetVal;
-
-        if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
-          NextBB = II->getNormalDest();
-          return true;
-        }
       }
     } else if (isa<TerminatorInst>(CurInst)) {
       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
@@ -2582,8 +2577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         ConstantInt *Val =
           dyn_cast<ConstantInt>(getVal(SI->getCondition()));
         if (!Val) return false;  // Cannot determine.
-        unsigned ValTISucc = SI->resolveSuccessorIndex(SI->findCaseValue(Val));
-        NextBB = SI->getSuccessor(ValTISucc);
+        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
       } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
         Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
         if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
@@ -2611,6 +2605,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       setVal(CurInst, InstResult);
     }
 
+    // If we just processed an invoke, we finished evaluating the block.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+      NextBB = II->getNormalDest();
+      return true;
+    }
+
     // Advance program counter.
     ++CurInst;
   }
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 749a3fa..3c7fac6 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,8 +32,6 @@ namespace {
 
   // AlwaysInliner only inlines functions that are mark as "always inline".
   class AlwaysInliner : public Inliner {
-    // Functions that are never inlined
-    SmallPtrSet<const Function*, 16> NeverInline;
     InlineCostAnalyzer CA;
   public:
     // Use extremely low threshold.
@@ -46,7 +44,22 @@ namespace {
     }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
-      return CA.getInlineCost(CS, NeverInline);
+      Function *Callee = CS.getCalledFunction();
+      // We assume indirect calls aren't calling an always-inline function.
+      if (!Callee) return InlineCost::getNever();
+
+      // We can't inline calls to external functions.
+      // FIXME: We shouldn't even get here.
+      if (Callee->isDeclaration()) return InlineCost::getNever();
+
+      // Return never for anything not marked as always inline.
+      if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+        return InlineCost::getNever();
+
+      // We still have to check the inline cost in case there are reasons to
+      // not inline which trump the always-inline attribute such as setjmp and
+      // indirectbr.
+      return CA.getInlineCost(CS);
     }
     float getInlineFudgeFactor(CallSite CS) {
       return CA.getInlineFudgeFactor(CS);
@@ -58,7 +71,7 @@ namespace {
       CA.growCachedCostInfo(Caller, Callee);
     }
     virtual bool doFinalization(CallGraph &CG) {
-      return removeDeadFunctions(CG, &NeverInline);
+      return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
     }
     virtual bool doInitialization(CallGraph &CG);
     void releaseMemory() {
@@ -84,12 +97,5 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
 // been annotated with the "always inline" attribute.
 bool AlwaysInliner::doInitialization(CallGraph &CG) {
   CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
-  Module &M = CG.getModule();
-
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
-      NeverInline.insert(I);
-
   return false;
 }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index b3421eb..03032e6 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -23,15 +23,12 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
 
 namespace {
 
   class SimpleInliner : public Inliner {
-    // Functions that are never inlined
-    SmallPtrSet<const Function*, 16> NeverInline;
     InlineCostAnalyzer CA;
   public:
     SimpleInliner() : Inliner(ID) {
@@ -43,7 +40,7 @@ namespace {
     }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
-      return CA.getInlineCost(CS, NeverInline);
+      return CA.getInlineCost(CS);
     }
     float getInlineFudgeFactor(CallSite CS) {
       return CA.getInlineFudgeFactor(CS);
@@ -78,44 +75,6 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
 // annotated with the noinline attribute.
 bool SimpleInliner::doInitialization(CallGraph &CG) {
   CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
-  Module &M = CG.getModule();
-
-  for (Module::iterator I = M.begin(), E = M.end();
-       I != E; ++I)
-    if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
-      NeverInline.insert(I);
-
-  // Get llvm.noinline
-  GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
-  if (GV == 0)
-    return false;
-
-  // Don't crash on invalid code
-  if (!GV->hasDefinitiveInitializer())
-    return false;
-
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
-  if (InitList == 0)
-    return false;
-
-  // Iterate over each element and add to the NeverInline set
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
-    // Get Source
-    const Constant *Elt = InitList->getOperand(i);
-
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
-      if (CE->getOpcode() == Instruction::BitCast)
-        Elt = CE->getOperand(0);
-
-    // Insert into set of functions to never inline
-    if (const Function *F = dyn_cast<Function>(Elt))
-      NeverInline.insert(F);
-  }
-
   return false;
 }
 
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 1f7625d..9975333 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -243,13 +244,20 @@ bool Inliner::shouldInline(CallSite CS) {
     return false;
   }
   
-  // Try to detect the case where the current inlining candidate caller
-  // (call it B) is a static function and is an inlining candidate elsewhere,
-  // and the current candidate callee (call it C) is large enough that
-  // inlining it into B would make B too big to inline later.  In these
-  // circumstances it may be best not to inline C into B, but to inline B
-  // into its callers.
-  if (Caller->hasLocalLinkage()) {
+  // Try to detect the case where the current inlining candidate caller (call
+  // it B) is a static or linkonce-ODR function and is an inlining candidate
+  // elsewhere, and the current candidate callee (call it C) is large enough
+  // that inlining it into B would make B too big to inline later. In these
+  // circumstances it may be best not to inline C into B, but to inline B into
+  // its callers.
+  //
+  // This only applies to static and linkonce-ODR functions because those are
+  // expected to be available for inlining in the translation units where they
+  // are used. Thus we will always have the opportunity to make local inlining
+  // decisions. Importantly the linkonce-ODR linkage covers inline functions
+  // and templates in C++.
+  if (Caller->hasLocalLinkage() ||
+      Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
     int TotalSecondaryCost = 0;
     bool outerCallsFound = false;
     // This bool tracks what happens if we do NOT inline C into B.
@@ -327,6 +335,37 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
   return false;
 }
 
+/// \brief Simplify arguments going into a particular callsite.
+///
+/// This is important to do each time we add a callsite due to inlining so that
+/// constants and other entities which feed into inline cost estimation are
+/// properly recognized when analyzing the new callsite. Consider:
+///   void outer(int x) {
+///     if (x < 42)
+///       return inner(42 - x);
+///     ...
+///   }
+///   void inner(int x) {
+///     ...
+///   }
+///
+/// The inliner gives calls to 'outer' with a constant argument a bonus because
+/// it will delete one side of a branch. But the resulting call to 'inner'
+/// will, after inlining, also have a constant operand. We need to do just
+/// enough constant folding to expose this for callsite arguments. The rest
+/// will be taken care of after the inliner finishes running.
+static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
+  // FIXME: It would be nice to avoid this smallvector if RAUW doesn't
+  // invalidate operand iterators in any cases.
+  SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
+  for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (Instruction *Inst = dyn_cast<Instruction>(*I))
+      if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
+        SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
+  for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
+    SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
+}
 
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
@@ -455,7 +494,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
           for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
                i != e; ++i) {
             Value *Ptr = InlineInfo.InlinedCalls[i];
-            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+            CallSite NewCS = Ptr;
+            simplifyCallSiteArguments(TD, NewCS);
+            CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
           }
         }
         
@@ -515,25 +556,27 @@ bool Inliner::doFinalization(CallGraph &CG) {
 
 /// removeDeadFunctions - Remove dead functions that are not included in
 /// DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, 
-                                  SmallPtrSet<const Function *, 16> *DNR) {
-  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+  SmallVector<CallGraphNode*, 16> FunctionsToRemove;
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
   for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
     CallGraphNode *CGN = I->second;
-    if (CGN->getFunction() == 0)
-      continue;
-    
     Function *F = CGN->getFunction();
-    
+    if (!F || F->isDeclaration())
+      continue;
+
+    // Handle the case when this function is called and we only want to care
+    // about always-inline functions. This is a bit of a hack to share code
+    // between here and the InlineAlways pass.
+    if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+      continue;
+
     // If the only remaining users of the function are dead constants, remove
     // them.
     F->removeDeadConstantUsers();
 
-    if (DNR && DNR->count(F))
-      continue;
     if (!F->isDefTriviallyDead())
       continue;
     
@@ -546,24 +589,28 @@ bool Inliner::removeDeadFunctions(CallGraph &CG,
     CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
 
     // Removing the node for callee from the call graph and delete it.
-    FunctionsToRemove.insert(CGN);
+    FunctionsToRemove.push_back(CGN);
   }
+  if (FunctionsToRemove.empty())
+    return false;
 
   // Now that we know which functions to delete, do so.  We didn't want to do
   // this inline, because that would invalidate our CallGraph::iterator
   // objects. :(
   //
-  // Note that it doesn't matter that we are iterating over a non-stable set
+  // Note that it doesn't matter that we are iterating over a non-stable order
   // here to do this, it doesn't matter which order the functions are deleted
   // in.
-  bool Changed = false;
-  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
-       E = FunctionsToRemove.end(); I != E; ++I) {
+  std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+  FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+                                      FunctionsToRemove.end()),
+                          FunctionsToRemove.end());
+  for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+                                                  E = FunctionsToRemove.end();
+       I != E; ++I) {
     resetCachedCostInfo((*I)->getFunction());
     delete CG.removeFunctionFromModule(*I);
     ++NumDeleted;
-    Changed = true;
   }
-
-  return Changed;
+  return true;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cc8f5bf..1165660 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1929,8 +1929,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       }
 
   // Canonicalize xor to the RHS.
-  if (match(Op0, m_Xor(m_Value(), m_Value())))
+  bool SwappedForXor = false;
+  if (match(Op0, m_Xor(m_Value(), m_Value()))) {
     std::swap(Op0, Op1);
+    SwappedForXor = true;
+  }
 
   // A | ( A ^ B) -> A |  B
   // A | (~A ^ B) -> A | ~B
@@ -1961,6 +1964,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         return BinaryOperator::CreateOr(Not, Op0);
       }
 
+  if (SwappedForXor)
+    std::swap(Op0, Op1);
+
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (Value *Res = FoldOrOfICmps(LHS, RHS))
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 32009c3..99a02fc 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -55,9 +55,9 @@ public:
     Worklist.reserve(NumEntries+16);
     WorklistMap.resize(NumEntries);
     DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
-    for (; NumEntries; --NumEntries) {
+    for (unsigned Idx = 0; NumEntries; --NumEntries) {
       Instruction *I = List[NumEntries-1];
-      WorklistMap.insert(std::make_pair(I, Worklist.size()));
+      WorklistMap.insert(std::make_pair(I, Idx++));
       Worklist.push_back(I);
     }
   }
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 318256a..349ba83 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1245,15 +1245,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
     if (I->getOpcode() == Instruction::Add)
       if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
         // change 'switch (X+4) case 1:' into 'switch (X) case -3'
-        unsigned NumCases = SI.getNumCases();
         // Skip the first item since that's the default case.
-        for (unsigned i = 0; i < NumCases; ++i) {
-          ConstantInt* CaseVal = SI.getCaseValue(i);
+        for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+             i != e; ++i) {
+          ConstantInt* CaseVal = i.getCaseValue();
           Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
                                                       AddRHS);
           assert(isa<ConstantInt>(NewCaseVal) &&
                  "Result of expression should be constant");
-          SI.setCaseValue(i, cast<ConstantInt>(NewCaseVal));
+          i.setValue(cast<ConstantInt>(NewCaseVal));
         }
         SI.setCondition(I->getOperand(0));
         Worklist.Add(I);
@@ -1873,9 +1873,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
       if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
         // See if this is an explicit destination.
-        for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
-          if (SI->getCaseValue(i) == Cond) {
-            BasicBlock *ReachableBB = SI->getCaseSuccessor(i);
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i)
+          if (i.getCaseValue() == Cond) {
+            BasicBlock *ReachableBB = i.getCaseSuccessor();
             Worklist.push_back(ReachableBB);
             continue;
           }
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 123e399..b43b9e5 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,6 +15,7 @@
 
 #define DEBUG_TYPE "asan"
 
+#include "FunctionBlackList.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
@@ -29,8 +30,6 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRBuilder.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include "llvm/Target/TargetData.h"
@@ -126,21 +125,6 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
 
 namespace {
 
-// Blacklisted functions are not instrumented.
-// The blacklist file contains one or more lines like this:
-// ---
-// fun:FunctionWildCard
-// ---
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-class BlackList {
- public:
-  BlackList(const std::string &Path);
-  bool isIn(const Function &F);
- private:
-  Regex *Functions;
-};
-
 /// AddressSanitizer: instrument the code in module to find memory bugs.
 struct AddressSanitizer : public ModulePass {
   AddressSanitizer();
@@ -195,7 +179,7 @@ struct AddressSanitizer : public ModulePass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Instruction *CtorInsertBefore;
-  OwningPtr<BlackList> BL;
+  OwningPtr<FunctionBlackList> BL;
 };
 }  // namespace
 
@@ -470,7 +454,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
     GlobalVariable *G = GlobalsToChange[i];
     PointerType *PtrTy = cast<PointerType>(G->getType());
     Type *Ty = PtrTy->getElementType();
-    uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8;
+    uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
     uint64_t RightRedzoneSize = RedzoneSize +
         (RedzoneSize - (SizeInBytes % RedzoneSize));
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
@@ -551,7 +535,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
   TD = getAnalysisIfAvailable<TargetData>();
   if (!TD)
     return false;
-  BL.reset(new BlackList(ClBlackListFile));
+  BL.reset(new FunctionBlackList(ClBlackListFile));
 
   CurrentModule = &M;
   C = &(M.getContext());
@@ -595,18 +579,23 @@ bool AddressSanitizer::runOnModule(Module &M) {
   if (ClGlobals)
     Res |= insertGlobalRedzones(M);
 
-  // Tell the run-time the current values of mapping offset and scale.
-  GlobalValue *asan_mapping_offset =
-      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
-                     ConstantInt::get(IntptrTy, MappingOffset),
-                     kAsanMappingOffsetName);
-  GlobalValue *asan_mapping_scale =
-      new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
-                         ConstantInt::get(IntptrTy, MappingScale),
-                         kAsanMappingScaleName);
-  // Read these globals, otherwise they may be optimized away.
-  IRB.CreateLoad(asan_mapping_scale, true);
-  IRB.CreateLoad(asan_mapping_offset, true);
+  if (ClMappingOffsetLog >= 0) {
+    // Tell the run-time the current values of mapping offset and scale.
+    GlobalValue *asan_mapping_offset =
+        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                       ConstantInt::get(IntptrTy, MappingOffset),
+                       kAsanMappingOffsetName);
+    // Read the global, otherwise it may be optimized away.
+    IRB.CreateLoad(asan_mapping_offset, true);
+  }
+  if (ClMappingScale) {
+    GlobalValue *asan_mapping_scale =
+        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                           ConstantInt::get(IntptrTy, MappingScale),
+                           kAsanMappingScaleName);
+    // Read the global, otherwise it may be optimized away.
+    IRB.CreateLoad(asan_mapping_scale, true);
+  }
 
 
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
@@ -946,54 +935,3 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
 
   return true;
 }
-
-BlackList::BlackList(const std::string &Path) {
-  Functions = NULL;
-  const char *kFunPrefix = "fun:";
-  if (!ClBlackListFile.size()) return;
-  std::string Fun;
-
-  OwningPtr<MemoryBuffer> File;
-  if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
-    report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
-                       EC.message());
-  }
-  MemoryBuffer *Buff = File.take();
-  const char *Data = Buff->getBufferStart();
-  size_t DataLen = Buff->getBufferSize();
-  SmallVector<StringRef, 16> Lines;
-  SplitString(StringRef(Data, DataLen), Lines, "\n\r");
-  for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
-    if (Lines[i].startswith(kFunPrefix)) {
-      std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
-      std::string ThisFuncRE;
-      // add ThisFunc replacing * with .*
-      for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
-        if (ThisFunc[j] == '*')
-          ThisFuncRE += '.';
-        ThisFuncRE += ThisFunc[j];
-      }
-      // Check that the regexp is valid.
-      Regex CheckRE(ThisFuncRE);
-      std::string Error;
-      if (!CheckRE.isValid(Error))
-        report_fatal_error("malformed blacklist regex: " + ThisFunc +
-                           ": " + Error);
-      // Append to the final regexp.
-      if (Fun.size())
-        Fun += "|";
-      Fun += ThisFuncRE;
-    }
-  }
-  if (Fun.size()) {
-    Functions = new Regex(Fun);
-  }
-}
-
-bool BlackList::isIn(const Function &F) {
-  if (Functions) {
-    bool Res = Functions->match(F.getName());
-    return Res;
-  }
-  return false;
-}
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index f8dbca3..e4c8cf1 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMInstrumentation
   AddressSanitizer.cpp
   EdgeProfiling.cpp
+  FunctionBlackList.cpp
   GCOVProfiling.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
new file mode 100644
index 0000000..188ea4d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
@@ -0,0 +1,79 @@
+//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer 
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+FunctionBlackList::FunctionBlackList(const std::string &Path) {
+  Functions = NULL;
+  const char *kFunPrefix = "fun:";
+  if (!Path.size()) return;
+  std::string Fun;
+
+  OwningPtr<MemoryBuffer> File;
+  if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
+    report_fatal_error("Can't open blacklist file " + Path + ": " +
+                       EC.message());
+  }
+  MemoryBuffer *Buff = File.take();
+  const char *Data = Buff->getBufferStart();
+  size_t DataLen = Buff->getBufferSize();
+  SmallVector<StringRef, 16> Lines;
+  SplitString(StringRef(Data, DataLen), Lines, "\n\r");
+  for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
+    if (Lines[i].startswith(kFunPrefix)) {
+      std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
+      std::string ThisFuncRE;
+      // add ThisFunc replacing * with .*
+      for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
+        if (ThisFunc[j] == '*')
+          ThisFuncRE += '.';
+        ThisFuncRE += ThisFunc[j];
+      }
+      // Check that the regexp is valid.
+      Regex CheckRE(ThisFuncRE);
+      std::string Error;
+      if (!CheckRE.isValid(Error))
+        report_fatal_error("malformed blacklist regex: " + ThisFunc +
+                           ": " + Error);
+      // Append to the final regexp.
+      if (Fun.size())
+        Fun += "|";
+      Fun += ThisFuncRE;
+    }
+  }
+  if (Fun.size()) {
+    Functions = new Regex(Fun);
+  }
+}
+
+bool FunctionBlackList::isIn(const Function &F) {
+  if (Functions) {
+    bool Res = Functions->match(F.getName());
+    return Res;
+  }
+  return false;
+}
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
new file mode 100644
index 0000000..c1239b9
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.h
@@ -0,0 +1,37 @@
+//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include <string>
+
+namespace llvm {
+class Function;
+class Regex;
+
+// Blacklisted functions are not instrumented.
+// The blacklist file contains one or more lines like this:
+// ---
+// fun:FunctionWildCard
+// ---
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+class FunctionBlackList {
+ public:
+  FunctionBlackList(const std::string &Path);
+  bool isIn(const Function &F);
+ private:
+  Regex *Functions;
+};
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index d822535..85fda30 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,12 +21,14 @@
 
 #define DEBUG_TYPE "tsan"
 
+#include "FunctionBlackList.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/MathExtras.h"
@@ -37,6 +39,9 @@
 
 using namespace llvm;
 
+static cl::opt<std::string>  ClBlackListFile("tsan-blacklist",
+       cl::desc("Blacklist file"), cl::Hidden);
+
 namespace {
 /// ThreadSanitizer: instrument the code in module to find races.
 struct ThreadSanitizer : public FunctionPass {
@@ -48,6 +53,7 @@ struct ThreadSanitizer : public FunctionPass {
 
  private:
   TargetData *TD;
+  OwningPtr<FunctionBlackList> BL;
   // Callbacks to run-time library are computed in doInitialization.
   Value *TsanFuncEntry;
   Value *TsanFuncExit;
@@ -76,6 +82,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
   TD = getAnalysisIfAvailable<TargetData>();
   if (!TD)
     return false;
+  BL.reset(new FunctionBlackList(ClBlackListFile));
+
   // Always insert a call to __tsan_init into the module's CTORs.
   IRBuilder<> IRB(M.getContext());
   Value *TsanInit = M.getOrInsertFunction("__tsan_init",
@@ -102,6 +110,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
 
 bool ThreadSanitizer::runOnFunction(Function &F) {
   if (!TD) return false;
+  if (BL->isIn(F)) return false;
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> LoadsAndStores;
   bool Res = false;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index aad3a92..020ec57 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -579,6 +579,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     return true;
   }
 
+  if (II && TLI) {
+    SmallVector<Value*, 2> PtrOps;
+    Type *AccessTy;
+    if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+      while (!PtrOps.empty())
+        if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+          return true;
+  }
+
   // From here on out we're working with named functions.
   if (CI->getCalledFunction() == 0) return false;
 
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index e275268..9b0aadb 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -28,6 +28,7 @@ STATISTIC(NumPhis,      "Number of phis propagated");
 STATISTIC(NumSelects,   "Number of selects propagated");
 STATISTIC(NumMemAccess, "Number of memory access targets propagated");
 STATISTIC(NumCmps,      "Number of comparisons propagated");
+STATISTIC(NumDeadCases, "Number of switch cases removed");
 
 namespace {
   class CorrelatedValuePropagation : public FunctionPass {
@@ -37,6 +38,7 @@ namespace {
     bool processPHI(PHINode *P);
     bool processMemAccess(Instruction *I);
     bool processCmp(CmpInst *C);
+    bool processSwitch(SwitchInst *SI);
 
   public:
     static char ID;
@@ -110,7 +112,8 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
     Changed = true;
   }
 
-  ++NumPhis;
+  if (Changed)
+    ++NumPhis;
 
   return Changed;
 }
@@ -173,6 +176,86 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
   return true;
 }
 
+/// processSwitch - Simplify a switch instruction by removing cases which can
+/// never fire.  If the uselessness of a case could be determined locally then
+/// constant propagation would already have figured it out.  Instead, walk the
+/// predecessors and statically evaluate cases based on information available
+/// on that edge.  Cases that cannot fire no matter what the incoming edge can
+/// safely be removed.  If a case fires on every incoming edge then the entire
+/// switch can be removed and replaced with a branch to the case destination.
+bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+  Value *Cond = SI->getCondition();
+  BasicBlock *BB = SI->getParent();
+
+  // If the condition was defined in same block as the switch then LazyValueInfo
+  // currently won't say anything useful about it, though in theory it could.
+  if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
+    return false;
+
+  // If the switch is unreachable then trying to improve it is a waste of time.
+  pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+  if (PB == PE) return false;
+
+  // Analyse each switch case in turn.  This is done in reverse order so that
+  // removing a case doesn't cause trouble for the iteration.
+  bool Changed = false;
+  for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
+       ) {
+    ConstantInt *Case = CI.getCaseValue();
+
+    // Check to see if the switch condition is equal to/not equal to the case
+    // value on every incoming edge, equal/not equal being the same each time.
+    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
+      // Is the switch condition equal to the case value?
+      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                              Cond, Case, *PI, BB);
+      // Give up on this case if nothing is known.
+      if (Value == LazyValueInfo::Unknown) {
+        State = LazyValueInfo::Unknown;
+        break;
+      }
+
+      // If this was the first edge to be visited, record that all other edges
+      // need to give the same result.
+      if (PI == PB) {
+        State = Value;
+        continue;
+      }
+
+      // If this case is known to fire for some edges and known not to fire for
+      // others then there is nothing we can do - give up.
+      if (Value != State) {
+        State = LazyValueInfo::Unknown;
+        break;
+      }
+    }
+
+    if (State == LazyValueInfo::False) {
+      // This case never fires - remove it.
+      CI.getCaseSuccessor()->removePredecessor(BB);
+      SI->removeCase(CI); // Does not invalidate the iterator.
+      ++NumDeadCases;
+      Changed = true;
+    } else if (State == LazyValueInfo::True) {
+      // This case always fires.  Arrange for the switch to be turned into an
+      // unconditional branch by replacing the switch condition with the case
+      // value.
+      SI->setCondition(Case);
+      NumDeadCases += SI->getNumCases();
+      Changed = true;
+      break;
+    }
+  }
+
+  if (Changed)
+    // If the switch has been simplified to the point where it can be replaced
+    // by a branch then do so now.
+    ConstantFoldTerminator(BB);
+
+  return Changed;
+}
+
 bool CorrelatedValuePropagation::runOnFunction(Function &F) {
   LVI = &getAnalysis<LazyValueInfo>();
 
@@ -200,6 +283,13 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
       }
     }
 
+    Instruction *Term = FI->getTerminator();
+    switch (Term->getOpcode()) {
+    case Instruction::Switch:
+      BBChanged |= processSwitch(cast<SwitchInst>(Term));
+      break;
+    }
+
     FnChanged |= BBChanged;
   }
 
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index fe05e35..ac80c48 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -2158,10 +2158,11 @@ bool GVN::processInstruction(Instruction *I) {
     Value *SwitchCond = SI->getCondition();
     BasicBlock *Parent = SI->getParent();
     bool Changed = false;
-    for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
-      BasicBlock *Dst = SI->getCaseSuccessor(i);
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      BasicBlock *Dst = i.getCaseSuccessor();
       if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
-        Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
     }
     return Changed;
   }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index d1e57e1..490617a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -450,8 +450,10 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   }
 
   // Add a new IVUsers entry for the newly-created integer PHI.
-  if (IU)
-    IU->AddUsersIfInteresting(NewPHI);
+  if (IU) {
+    SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+    IU->AddUsersIfInteresting(NewPHI, SimplifiedLoopNests);
+  }
 
   Changed = true;
 }
@@ -1967,8 +1969,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // loop exit test instruction.
   if (IU && NewICmp) {
     ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
-    if (NewICmpInst)
-      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
+    if (NewICmpInst) {
+      SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)),
+                                SimplifiedLoopNests);
+    }
   }
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index fa25a8f..429b61b 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -857,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (BBIt != LoadBB->begin())
     return false;
 
+  // If all of the loads and stores that feed the value have the same TBAA tag,
+  // then we can propagate it onto any newly inserted loads.
+  MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); 
 
   SmallPtrSet<BasicBlock*, 8> PredsScanned;
   typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -875,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Scan the predecessor to see if the value is available in the pred.
     BBIt = PredBB->end();
-    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+    MDNode *ThisTBAATag = 0;
+    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+                                                    0, &ThisTBAATag);
     if (!PredAvailable) {
       OneUnavailablePred = PredBB;
       continue;
     }
+    
+    // If tbaa tags disagree or are not present, forget about them.
+    if (TBAATag != ThisTBAATag) TBAATag = 0;
 
     // If so, this load is partially redundant.  Remember this info so that we
     // can create a PHI node.
@@ -939,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
                                  LI->getAlignment(),
                                  UnavailablePred->getTerminator());
     NewVal->setDebugLoc(LI->getDebugLoc());
+    if (TBAATag)
+      NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+    
     AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
   }
 
@@ -1087,8 +1098,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
     else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
       DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
     else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
-      unsigned ValCase = SI->findCaseValue(cast<ConstantInt>(Val));
-      DestBB = SI->getSuccessor(SI->resolveSuccessorIndex(ValCase));
+      DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
     } else {
       assert(isa<IndirectBrInst>(BB->getTerminator())
               && "Unexpected terminator");
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 6768860..82d918e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4534,22 +4534,25 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   if (!L->isLoopSimplifyForm())
     return;
 
+  // If there's no interesting work to be done, bail early.
+  if (IU.empty()) return;
+
+#ifndef NDEBUG
   // All dominating loops must have preheaders, or SCEVExpander may not be able
   // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
   //
-  // FIXME: This is a little absurd. I think LoopSimplify should be taught
-  // to create a preheader under any circumstance.
+  // IVUsers analysis should only create users that are dominated by simple loop
+  // headers. Since this loop should dominate all of its users, its user list
+  // should be empty if this loop itself is not within a simple loop nest.
   for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
        Rung; Rung = Rung->getIDom()) {
     BasicBlock *BB = Rung->getBlock();
     const Loop *DomLoop = LI.getLoopFor(BB);
     if (DomLoop && DomLoop->getHeader() == BB) {
-      if (!DomLoop->getLoopPreheader())
-        return;
+      assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
     }
   }
-  // If there's no interesting work to be done, bail early.
-  if (IU.empty()) return;
+#endif // DEBUG
 
   DEBUG(dbgs() << "\nLSR on loop ";
         WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 2c75f63..053eb0c 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -445,8 +445,9 @@ bool LoopUnswitch::processCurrentLoop() {
         // Do not process same value again and again.
         // At this point we have some cases already unswitched and
         // some not yet unswitched. Let's find the first not yet unswitched one.
-        for (unsigned i = 0; i < NumCases; ++i) {
-          Constant* UnswitchValCandidate = SI->getCaseValue(i);
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i) {
+          Constant* UnswitchValCandidate = i.getCaseValue();
           if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
             UnswitchVal = UnswitchValCandidate;
             break;
@@ -574,12 +575,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     // this. 
     // Note that we can't trivially unswitch on the default case or
     // on already unswitched cases.
-    for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
       BasicBlock* LoopExitCandidate;
       if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, 
-                                               SI->getCaseSuccessor(i)))) {
+                                               i.getCaseSuccessor()))) {
         // Okay, we found a trivial case, remember the value that is trivial.
-        ConstantInt* CaseVal = SI->getCaseValue(i);
+        ConstantInt* CaseVal = i.getCaseValue();
 
         // Check that it was not unswitched before, since already unswitched
         // trivial vals are looks trivial too.
@@ -1117,16 +1119,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     SwitchInst *SI = dyn_cast<SwitchInst>(U);
     if (SI == 0 || !isa<ConstantInt>(Val)) continue;
     
-    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+    SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
     // Default case is live for multiple values.
-    if (DeadCase == SwitchInst::ErrorIndex) continue;
+    if (DeadCase == SI->case_default()) continue;
     
     // Found a dead case value.  Don't remove PHI nodes in the 
     // successor if they become single-entry, those PHI nodes may
     // be in the Users list.
 
     BasicBlock *Switch = SI->getParent();
-    BasicBlock *SISucc = SI->getCaseSuccessor(DeadCase);
+    BasicBlock *SISucc = DeadCase.getCaseSuccessor();
     BasicBlock *Latch = L->getLoopLatch();
     
     BranchesInfo.setUnswitched(SI, Val);
@@ -1146,7 +1148,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     // Compute the successors instead of relying on the return value
     // of SplitEdge, since it may have split the switch successor
     // after PHI nodes.
-    BasicBlock *NewSISucc = SI->getCaseSuccessor(DeadCase);
+    BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
     BasicBlock *OldSISucc = *succ_begin(NewSISucc);
     // Create an "unreachable" destination.
     BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 1c7f036..9fdea8d 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -2929,11 +2929,17 @@ ComputePostOrders(Function &F,
   Visited.clear();
 
   // Compute the exits, which are the starting points for reverse-CFG DFS.
+  // This includes blocks where all the successors are backedges that
+  // we're skipping.
   SmallVector<BasicBlock *, 4> Exits;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *BB = I;
-    if (cast<TerminatorInst>(&BB->back())->getNumSuccessors() == 0)
-      Exits.push_back(BB);
+    TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
+      if (!Backedges.count(std::make_pair(BB, *SI)))
+        goto HasNonBackedgeSucc;
+    Exits.push_back(BB);
+  HasNonBackedgeSucc:;
   }
 
   // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
@@ -3035,7 +3041,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
       // but our releases will never depend on it, because they must be
       // paired with retains from before the invoke.
       InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
-      InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
+      if (!II->getMetadata(NoObjCARCExceptionsMDKind))
+        InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
     } else {
       // Insert code immediately after the last use.
       InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
@@ -4017,36 +4024,40 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         Use &U = UI.getUse();
         unsigned OperandNo = UI.getOperandNo();
         ++UI; // Increment UI now, because we may unlink its element.
-        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
-          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
-            Changed = true;
-            Instruction *Replacement = Inst;
-            Type *UseTy = U.get()->getType();
-            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
-              // For PHI nodes, insert the bitcast in the predecessor block.
-              unsigned ValNo =
-                PHINode::getIncomingValueNumForOperand(OperandNo);
-              BasicBlock *BB =
-                PHI->getIncomingBlock(ValNo);
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "",
-                                              &BB->back());
-              for (unsigned i = 0, e = PHI->getNumIncomingValues();
-                   i != e; ++i)
-                if (PHI->getIncomingBlock(i) == BB) {
-                  // Keep the UI iterator valid.
-                  if (&PHI->getOperandUse(
-                        PHINode::getOperandNumForIncomingValue(i)) ==
-                        &UI.getUse())
-                    ++UI;
-                  PHI->setIncomingValue(i, Replacement);
-                }
-            } else {
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
-              U.set(Replacement);
-            }
+        Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+        if (!UserInst)
+          continue;
+        // FIXME: dominates should return true for unreachable UserInst.
+        if (!DT->isReachableFromEntry(UserInst->getParent()) ||
+            DT->dominates(Inst, UserInst)) {
+          Changed = true;
+          Instruction *Replacement = Inst;
+          Type *UseTy = U.get()->getType();
+          if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+            // For PHI nodes, insert the bitcast in the predecessor block.
+            unsigned ValNo =
+              PHINode::getIncomingValueNumForOperand(OperandNo);
+            BasicBlock *BB =
+              PHI->getIncomingBlock(ValNo);
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            &BB->back());
+            for (unsigned i = 0, e = PHI->getNumIncomingValues();
+                 i != e; ++i)
+              if (PHI->getIncomingBlock(i) == BB) {
+                // Keep the UI iterator valid.
+                if (&PHI->getOperandUse(
+                      PHINode::getOperandNumForIncomingValue(i)) ==
+                    &UI.getUse())
+                  ++UI;
+                PHI->setIncomingValue(i, Replacement);
+              }
+          } else {
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+            U.set(Replacement);
           }
+        }
       }
 
       // If Arg is a no-op casted pointer, strip one level of casts and
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4274b50..5ce82b9 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -564,7 +564,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
       return;
     }
 
-    Succs[SI->resolveSuccessorIndex(SI->findCaseValue(CI))] = true;
+    Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
     return;
   }
 
@@ -623,14 +623,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     if (CI == 0)
       return !SCValue.isUndefined();
 
-    // Make sure to skip the "default value" which isn't a value
-    for (unsigned i = 0, E = SI->getNumCases(); i != E; ++i)
-      if (SI->getCaseValue(i) == CI) // Found the taken branch.
-        return SI->getCaseSuccessor(i) == To;
-
-    // If the constant value is not equal to any of the branches, we must
-    // execute default branch.
-    return SI->getDefaultDest() == To;
+    return SI->findCaseValue(CI).getCaseSuccessor() == To;
   }
 
   // Just mark all destinations executable!
@@ -1495,12 +1488,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // If the input to SCCP is actually switch on undef, fix the undef to
       // the first constant.
       if (isa<UndefValue>(SI->getCondition())) {
-        SI->setCondition(SI->getCaseValue(0));
-        markEdgeExecutable(BB, SI->getCaseSuccessor(0));
+        SI->setCondition(SI->case_begin().getCaseValue());
+        markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
         return true;
       }
 
-      markForcedConstant(SI->getCondition(), SI->getCaseValue(0));
+      markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
       return true;
     }
   }
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d23263f..d36a18f 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -574,8 +574,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
     // transform it into a store of the expanded constant value.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
       assert(MSI->getRawDest() == Ptr && "Consistency error!");
-      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-      if (NumBytes != 0) {
+      signed SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+      if (SNumBytes > 0) {
+        unsigned NumBytes = static_cast<unsigned>(SNumBytes);
         unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
 
         // Compute the value replicated the right number of times.
@@ -1517,6 +1518,9 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       if (Length == 0)
         return MarkUnsafe(Info, User);
+      if (Length->isNegative())
+        return MarkUnsafe(Info, User);
+
       isSafeMemAccess(Offset, Length->getZExtValue(), 0,
                       UI.getOperandNo() == 0, Info, MI,
                       true /*AllowWholeAccess*/);
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
deleted file mode 100644
index 50c91b6..0000000
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===- BasicInliner.cpp - Basic function level inliner --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information. 
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "basicinliner"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
-#include "llvm/Transforms/Utils/BasicInliner.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
-
-using namespace llvm;
-
-static cl::opt<unsigned>     
-BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
-   cl::desc("Control the amount of basic inlining to perform (default = 200)"));
-
-namespace llvm {
-
-  /// BasicInlinerImpl - BasicInliner implemantation class. This hides
-  /// container info, used by basic inliner, from public interface.
-  struct BasicInlinerImpl {
-    
-    BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
-    void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
-  public:
-    BasicInlinerImpl(TargetData *T) : TD(T) {}
-
-    /// addFunction - Add function into the list of functions to process.
-    /// All functions must be inserted using this interface before invoking
-    /// inlineFunctions().
-    void addFunction(Function *F) {
-      Functions.push_back(F);
-    }
-
-    /// neverInlineFunction - Sometimes a function is never to be inlined 
-    /// because of one or other reason. 
-    void neverInlineFunction(Function *F) {
-      NeverInline.insert(F);
-    }
-
-    /// inlineFuctions - Walk all call sites in all functions supplied by
-    /// client. Inline as many call sites as possible. Delete completely
-    /// inlined functions.
-    void inlineFunctions();
-    
-  private:
-    TargetData *TD;
-    std::vector<Function *> Functions;
-    SmallPtrSet<const Function *, 16> NeverInline;
-    SmallPtrSet<Function *, 8> DeadFunctions;
-    InlineCostAnalyzer CA;
-  };
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInlinerImpl::inlineFunctions() {
-      
-  // Scan through and identify all call sites ahead of time so that we only
-  // inline call sites in the original functions, not call sites that result
-  // from inlining other functions.
-  std::vector<CallSite> CallSites;
-  
-  for (std::vector<Function *>::iterator FI = Functions.begin(),
-         FE = Functions.end(); FI != FE; ++FI) {
-    Function *F = *FI;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-        CallSite CS(cast<Value>(I));
-        if (CS && CS.getCalledFunction()
-            && !CS.getCalledFunction()->isDeclaration())
-          CallSites.push_back(CS);
-      }
-  }
-  
-  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
-  
-  // Inline call sites.
-  bool Changed = false;
-  do {
-    Changed = false;
-    for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); 
-         ++index) {
-      CallSite CS = CallSites[index];
-      if (Function *Callee = CS.getCalledFunction()) {
-        
-        // Eliminate calls that are never inlinable.
-        if (Callee->isDeclaration() ||
-            CS.getInstruction()->getParent()->getParent() == Callee) {
-          CallSites.erase(CallSites.begin() + index);
-          --index;
-          continue;
-        }
-        InlineCost IC = CA.getInlineCost(CS, NeverInline);
-        if (IC.isAlways()) {        
-          DEBUG(dbgs() << "  Inlining: cost=always"
-                       <<", call: " << *CS.getInstruction());
-        } else if (IC.isNever()) {
-          DEBUG(dbgs() << "  NOT Inlining: cost=never"
-                       <<", call: " << *CS.getInstruction());
-          continue;
-        } else {
-          int Cost = IC.getValue();
-          
-          if (Cost >= (int) BasicInlineThreshold) {
-            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
-                         << ", call: " <<  *CS.getInstruction());
-            continue;
-          } else {
-            DEBUG(dbgs() << "  Inlining: cost = " << Cost
-                         << ", call: " <<  *CS.getInstruction());
-          }
-        }
-        
-        // Inline
-        InlineFunctionInfo IFI(0, TD);
-        if (InlineFunction(CS, IFI)) {
-          Callee->removeDeadConstantUsers();
-          if (Callee->isDefTriviallyDead())
-            DeadFunctions.insert(Callee);
-          Changed = true;
-          CallSites.erase(CallSites.begin() + index);
-          --index;
-        }
-      }
-    }
-  } while (Changed);
-  
-  // Remove completely inlined functions from module.
-  for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
-        E = DeadFunctions.end(); I != E; ++I) {
-    Function *D = *I;
-    Module *M = D->getParent();
-    M->getFunctionList().remove(D);
-  }
-}
-
-BasicInliner::BasicInliner(TargetData *TD) {
-  Impl = new BasicInlinerImpl(TD);
-}
-
-BasicInliner::~BasicInliner() {
-  delete Impl;
-}
-
-/// addFunction - Add function into the list of functions to process.
-/// All functions must be inserted using this interface before invoking
-/// inlineFunctions().
-void BasicInliner::addFunction(Function *F) {
-  Impl->addFunction(F);
-}
-
-/// neverInlineFunction - Sometimes a function is never to be inlined because
-/// of one or other reason. 
-void BasicInliner::neverInlineFunction(Function *F) {
-  Impl->neverInlineFunction(F);
-}
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInliner::inlineFunctions() {
-  Impl->inlineFunctions();
-}
-
-}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index d1aa599..7f5cb5e 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_llvm_library(LLVMTransformUtils
   AddrModeMatcher.cpp
   BasicBlockUtils.cpp
-  BasicInliner.cpp
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
   CloneFunction.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 04ef7d7..1b28c35 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -313,8 +313,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       Cond = dyn_cast_or_null<ConstantInt>(V);
     }
     if (Cond) {     // Constant fold to uncond branch!
-      unsigned CaseIndex = SI->findCaseValue(Cond);
-      BasicBlock *Dest = SI->getSuccessor(SI->resolveSuccessorIndex(CaseIndex));
+      SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+      BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
       VMap[OldTI] = BranchInst::Create(Dest, NewBB);
       ToClone.push_back(Dest);
       TerminatorDone = true;
@@ -424,10 +424,6 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     //
     BasicBlock::iterator I = NewBB->begin();
 
-    DebugLoc TheCallDL;
-    if (TheCall) 
-      TheCallDL = TheCall->getDebugLoc();
-    
     // Handle PHI nodes specially, as we have to remove references to dead
     // blocks.
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 429919b..e8c0b80 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -617,7 +617,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
     // of the other successors.
     TheSwitch->setCondition(call);
     TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
-    TheSwitch->removeCase(NumExitBlocks-1);  // Remove redundant case
+    // Remove redundant case
+    TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
     break;
   }
 }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 336d8f6..5f895eb 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -106,31 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
     // If we are switching on a constant, we can convert the switch into a
     // single branch instruction!
     ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
-    BasicBlock *TheOnlyDest = SI->getDefaultDest();  // The default dest
+    BasicBlock *TheOnlyDest = SI->getDefaultDest();
     BasicBlock *DefaultDest = TheOnlyDest;
 
     // Figure out which case it goes to.
-    for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
       // Found case matching a constant operand?
-      if (SI->getCaseValue(i) == CI) {
-        TheOnlyDest = SI->getCaseSuccessor(i);
+      if (i.getCaseValue() == CI) {
+        TheOnlyDest = i.getCaseSuccessor();
         break;
       }
 
       // Check to see if this branch is going to the same place as the default
       // dest.  If so, eliminate it as an explicit compare.
-      if (SI->getCaseSuccessor(i) == DefaultDest) {
+      if (i.getCaseSuccessor() == DefaultDest) {
         // Remove this entry.
         DefaultDest->removePredecessor(SI->getParent());
         SI->removeCase(i);
-        --i; --e;  // Don't skip an entry...
+        --i; --e;
         continue;
       }
 
       // Otherwise, check to see if the switch only branches to one destination.
       // We do this by reseting "TheOnlyDest" to null when we find two non-equal
       // destinations.
-      if (SI->getCaseSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+      if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
     }
 
     if (CI && !TheOnlyDest) {
@@ -167,11 +168,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
     if (SI->getNumCases() == 1) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
+      SwitchInst::CaseIt FirstCase = SI->case_begin();
       Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
-                                         SI->getCaseValue(0), "cond");
+          FirstCase.getCaseValue(), "cond");
 
       // Insert the new branch.
-      Builder.CreateCondBr(Cond, SI->getCaseSuccessor(0), SI->getDefaultDest());
+      Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
+                           SI->getDefaultDest());
 
       // Delete the old switch.
       SI->eraseFromParent();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 4376265..0bc185d 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -386,7 +386,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
                                          this);
   } else {
     SmallVector<BasicBlock*, 2> NewBBs;
-    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", 
+    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
                                 ".split-lp", this, NewBBs);
     PreheaderBB = NewBBs[0];
   }
@@ -538,8 +538,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 ///
 Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
                                        BasicBlock *Preheader) {
-  // Don't try to separate loops without a preheader (this excludes 
-  // loop headers which are targeted by an indirectbr).
+  // Don't try to separate loops without a preheader.
   if (!Preheader)
     return 0;
 
@@ -554,11 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
   // handles the case when a PHI node has multiple instances of itself as
   // arguments.
   SmallVector<BasicBlock*, 8> OuterLoopPreds;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) != PN ||
-        !L->contains(PN->getIncomingBlock(i)))
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirectbr edges.
+      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+        return 0;
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
-
+    }
+  }
   DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
 
   // If ScalarEvolution is around and knows anything about values in
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index df8d68e..c70ced1 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -73,16 +73,16 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
   LLVMContext &Context = CI->getContext();
   Type *Int32Ty = Type::getInt32Ty(Context);
 
-  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
   std::vector<Value *> Vec;
   unsigned n = SI->getNumCases();
   Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case
 
   Vec[0] = MDString::get(Context, "branch_weights");
-  Vec[1] = ConstantInt::get(Int32Ty, SwitchInst::ErrorIndex == caseNo ?
+  Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ?
                             LikelyBranchWeight : UnlikelyBranchWeight);
   for (unsigned i = 0; i < n; ++i) {
-    Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == caseNo ?
+    Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ?
         LikelyBranchWeight : UnlikelyBranchWeight);
   }
 
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 424f564..a16130d 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   unsigned numCmps = 0;
 
   // Start with "simple" cases
-  for (unsigned i = 0; i < SI->getNumCases(); ++i)
-    Cases.push_back(CaseRange(SI->getCaseValue(i),
-                              SI->getCaseValue(i),
-                              SI->getCaseSuccessor(i)));
+  for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+    Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+                              i.getCaseSuccessor()));
+  
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
 
   // Merge case into clusters
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index a9853a4..d53a46e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -480,9 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
                                                       BasicBlock*> > &Cases) {
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cases.reserve(SI->getNumCases());
-    for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
-      Cases.push_back(std::make_pair(SI->getCaseValue(i),
-                                     SI->getCaseSuccessor(i)));
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+      Cases.push_back(std::make_pair(i.getCaseValue(),
+                                     i.getCaseSuccessor()));
     return SI->getDefaultDest();
   }
 
@@ -605,10 +605,10 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
                  << "Through successor TI: " << *TI);
 
-    for (unsigned i = SI->getNumCases(); i != 0;) {
+    for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
       --i;
-      if (DeadCases.count(SI->getCaseValue(i))) {
-        SI->getCaseSuccessor(i)->removePredecessor(TI->getParent());
+      if (DeadCases.count(i.getCaseValue())) {
+        i.getCaseSuccessor()->removePredecessor(TI->getParent());
         SI->removeCase(i);
       }
     }
@@ -2009,10 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
 
   // Find the relevant condition and destinations.
   Value *Condition = Select->getCondition();
-  unsigned TrueCase = SI->findCaseValue(TrueVal);
-  unsigned FalseCase = SI->findCaseValue(FalseVal);
-  BasicBlock *TrueBB = SI->getSuccessor(SI->resolveSuccessorIndex(TrueCase));
-  BasicBlock *FalseBB = SI->getSuccessor(SI->resolveSuccessorIndex(FalseCase));
+  BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+  BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
 
   // Perform the actual simplification.
   return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
@@ -2096,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
   // Ok, the block is reachable from the default dest.  If the constant we're
   // comparing exists in one of the other edges, then we can constant fold ICI
   // and zap it.
-  if (SI->findCaseValue(Cst) != SwitchInst::ErrorIndex) {
+  if (SI->findCaseValue(Cst) != SI->case_default()) {
     Value *V;
     if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
       V = ConstantInt::getFalse(BB->getContext());
@@ -2423,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         }
       }
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
-        if (SI->getCaseSuccessor(i) == BB) {
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+           i != e; ++i)
+        if (i.getCaseSuccessor() == BB) {
           BB->removePredecessor(SI->getParent());
           SI->removeCase(i);
           --i; --e;
@@ -2434,12 +2433,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
       // destination and make it the default.
       if (SI->getDefaultDest() == BB) {
         std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
-        for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) {
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i) {
           std::pair<unsigned, unsigned> &entry =
-              Popularity[SI->getCaseSuccessor(i)];
+              Popularity[i.getCaseSuccessor()];
           if (entry.first == 0) {
             entry.first = 1;
-            entry.second = i;
+            entry.second = i.getCaseIndex();
           } else {
             entry.first++;
           }
@@ -2470,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
             for (unsigned i = 0; i != MaxPop-1; ++i)
               MaxBlock->removePredecessor(SI->getParent());
           
-          for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i)
-            if (SI->getCaseSuccessor(i) == MaxBlock) {
+          for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+               i != e; ++i)
+            if (i.getCaseSuccessor() == MaxBlock) {
               SI->removeCase(i);
               --i; --e;
             }
@@ -2517,11 +2518,13 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
 
   // Make sure all cases point to the same destination and gather the values.
   SmallVector<ConstantInt *, 16> Cases;
-  Cases.push_back(SI->getCaseValue(0));
-  for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
-    if (SI->getCaseSuccessor(I-1) != SI->getCaseSuccessor(I))
+  SwitchInst::CaseIt I = SI->case_begin();
+  Cases.push_back(I.getCaseValue());
+  SwitchInst::CaseIt PrevI = I++;
+  for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+    if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
       return false;
-    Cases.push_back(SI->getCaseValue(I));
+    Cases.push_back(I.getCaseValue());
   }
   assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
 
@@ -2539,10 +2542,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
   Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
-  Builder.CreateCondBr(Cmp, SI->getCaseSuccessor(0), SI->getDefaultDest());
+  Builder.CreateCondBr(
+      Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
   // Prune obsolete incoming values off the successor's PHI nodes.
-  for (BasicBlock::iterator BBI = SI->getCaseSuccessor(0)->begin();
+  for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
        isa<PHINode>(BBI); ++BBI) {
     for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
       cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
@@ -2562,22 +2566,22 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
 
   // Gather dead cases.
   SmallVector<ConstantInt*, 8> DeadCases;
-  for (unsigned I = 0, E = SI->getNumCases(); I != E; ++I) {
-    if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
-        (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
-      DeadCases.push_back(SI->getCaseValue(I));
+  for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+    if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+        (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+      DeadCases.push_back(I.getCaseValue());
       DEBUG(dbgs() << "SimplifyCFG: switch case '"
-                   << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+                   << I.getCaseValue() << "' is dead.\n");
     }
   }
 
   // Remove dead cases from the switch.
   for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
-    unsigned Case = SI->findCaseValue(DeadCases[I]);
-    assert(Case != SwitchInst::ErrorIndex &&
+    SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+    assert(Case != SI->case_default() &&
            "Case was not found. Probably mistake in DeadCases forming.");
     // Prune unused values from PHI nodes.
-    SI->getCaseSuccessor(Case)->removePredecessor(SI->getParent());
+    Case.getCaseSuccessor()->removePredecessor(SI->getParent());
     SI->removeCase(Case);
   }
 
@@ -2626,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
   ForwardingNodesMap ForwardingNodes;
 
-  for (unsigned I = 0; I < SI->getNumCases(); ++I) { // 0 is the default case.
-    ConstantInt *CaseValue = SI->getCaseValue(I);
-    BasicBlock *CaseDest = SI->getCaseSuccessor(I);
+  for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+    ConstantInt *CaseValue = I.getCaseValue();
+    BasicBlock *CaseDest = I.getCaseSuccessor();
 
     int PhiIndex;
     PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 20eef3c..e00565d 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -231,8 +231,10 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
 
   // Inform IVUsers about the new users.
   if (IU) {
-    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
-      IU->AddUsersIfInteresting(I);
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) {
+      SmallPtrSet<Loop*, 16> SimplifiedLoopNests;
+      IU->AddUsersIfInteresting(I, SimplifiedLoopNests);
+    }
   }
   DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
   ++NumElimRem;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6874906..7b39efb 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1731,12 +1731,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ", ";
     writeOperand(SI.getDefaultDest(), true);
     Out << " [";
-    unsigned NumCases = SI.getNumCases();
-    for (unsigned i = 0; i < NumCases; ++i) {
+    for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+         i != e; ++i) {
       Out << "\n    ";
-      writeOperand(SI.getCaseValue(i), true);
+      writeOperand(i.getCaseValue(), true);
       Out << ", ";
-      writeOperand(SI.getCaseSuccessor(i), true);
+      writeOperand(i.getCaseSuccessor(), true);
     }
     Out << "\n  ]";
   } else if (isa<IndirectBrInst>(I)) {
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index a7277f6..8903a8f 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -16,6 +16,7 @@
 #define LLVM_CONSTANTSCONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Operator.h"
@@ -656,48 +657,18 @@ private:
       return ConstantClassInfo::getTombstoneKey();
     }
     static unsigned getHashValue(const ConstantClass *CP) {
-      // This is adapted from SuperFastHash by Paul Hsieh.
-      unsigned Hash = TypeClassInfo::getHashValue(CP->getType());
-      for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I) {
-        unsigned Data = ConstantInfo::getHashValue(CP->getOperand(I));
-        Hash         += Data & 0xFFFF;
-        unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
-        Hash          = (Hash << 16) ^ Tmp;
-        Hash         += Hash >> 11;
-      }
-
-      // Force "avalanching" of final 127 bits.
-      Hash ^= Hash << 3;
-      Hash += Hash >> 5;
-      Hash ^= Hash << 4;
-      Hash += Hash >> 17;
-      Hash ^= Hash << 25;
-      Hash += Hash >> 6;
-      return Hash;
+      SmallVector<Constant*, 8> CPOperands;
+      CPOperands.reserve(CP->getNumOperands());
+      for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+        CPOperands.push_back(CP->getOperand(I));
+      return getHashValue(LookupKey(CP->getType(), CPOperands));
     }
     static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
       return LHS == RHS;
     }
     static unsigned getHashValue(const LookupKey &Val) {
-      // This is adapted from SuperFastHash by Paul Hsieh.
-      unsigned Hash = TypeClassInfo::getHashValue(Val.first);
-      for (Operands::const_iterator
-        I = Val.second.begin(), E = Val.second.end(); I != E; ++I) {
-        unsigned Data = ConstantInfo::getHashValue(*I);
-        Hash         += Data & 0xFFFF;
-        unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
-        Hash          = (Hash << 16) ^ Tmp;
-        Hash         += Hash >> 11;
-      }
-
-      // Force "avalanching" of final 127 bits.
-      Hash ^= Hash << 3;
-      Hash += Hash >> 5;
-      Hash ^= Hash << 4;
-      Hash += Hash >> 17;
-      Hash ^= Hash << 25;
-      Hash += Hash >> 6;
-      return Hash;
+      return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+                                                        Val.second.end()));
     }
     static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
       if (RHS == getEmptyKey() || RHS == getTombstoneKey())
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 11fd5b6..8db6ac9 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -3159,13 +3159,16 @@ void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
   // Initialize some new operands.
   assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
   NumOperands = OpNo+2;
-  setCaseValue(NewCaseIdx, OnVal);
-  setCaseSuccessor(NewCaseIdx, Dest);
+  CaseIt Case(this, NewCaseIdx);
+  Case.setValue(OnVal);
+  Case.setSuccessor(Dest);
 }
 
 /// removeCase - This method removes the specified case and its successor
 /// from the switch instruction.
-void SwitchInst::removeCase(unsigned idx) {
+void SwitchInst::removeCase(CaseIt i) {
+  unsigned idx = i.getCaseIndex();
+  
   assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
 
   unsigned NumOps = getNumOperands();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 0bc4f74..c6f3558 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -185,16 +185,7 @@ bool Type::isSizedDerivedType() const {
   if (!this->isStructTy()) 
     return false;
 
-  // Opaque structs have no size.
-  if (cast<StructType>(this)->isOpaque())
-    return false;
-  
-  // Okay, our struct is sized if all of the elements are.
-  for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
-    if (!(*I)->isSized()) 
-      return false;
-
-  return true;
+  return cast<StructType>(this)->isSized();
 }
 
 //===----------------------------------------------------------------------===//
@@ -579,6 +570,26 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
   return llvm::StructType::create(Ctx, StructFields, Name);
 }
 
+bool StructType::isSized() const {
+  if ((getSubclassData() & SCDB_IsSized) != 0)
+    return true;
+  if (isOpaque())
+    return false;
+
+  // Okay, our struct is sized if all of the elements are, but if one of the
+  // elements is opaque, the struct isn't sized *yet*, but may become sized in
+  // the future, so just bail out without caching.
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSized())
+      return false;
+
+  // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+  // we find a sized type, as types can only move from opaque to sized, not the
+  // other way.
+  const_cast<StructType*>(this)->setSubclassData(
+    getSubclassData() | SCDB_IsSized);
+  return true;
+}
 
 StringRef StructType::getName() const {
   assert(!isLiteral() && "Literal structs never have names");
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 207c06d..41cc38c 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -317,20 +317,40 @@ void Value::replaceAllUsesWith(Value *New) {
     BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
-Value *Value::stripPointerCasts() {
-  if (!getType()->isPointerTy())
-    return this;
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+  PSK_ZeroIndices,
+  PSK_InBoundsConstantIndices,
+  PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+  if (!V->getType()->isPointerTy())
+    return V;
 
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
   SmallPtrSet<Value *, 4> Visited;
 
-  Value *V = this;
   Visited.insert(V);
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->hasAllZeroIndices())
-        return V;
+      switch (StripKind) {
+      case PSK_ZeroIndices:
+        if (!GEP->hasAllZeroIndices())
+          return V;
+        break;
+      case PSK_InBoundsConstantIndices:
+        if (!GEP->hasAllConstantIndices())
+          return V;
+        // fallthrough
+      case PSK_InBounds:
+        if (!GEP->isInBounds())
+          return V;
+        break;
+      }
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
@@ -346,6 +366,19 @@ Value *Value::stripPointerCasts() {
 
   return V;
 }
+} // namespace
+
+Value *Value::stripPointerCasts() {
+  return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
 
 /// isDereferenceablePointer - Test if this value is always a pointer to
 /// allocated and suitably aligned memory for a simple load or store.
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index dcf86d2..5b9b2a5 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -813,11 +813,11 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
   // have the same type as the switched-on value.
   Type *SwitchTy = SI.getCondition()->getType();
   SmallPtrSet<ConstantInt*, 32> Constants;
-  for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) {
-    Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+  for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+    Assert1(i.getCaseValue()->getType() == SwitchTy,
             "Switch constants must all be same type as switch value!", &SI);
-    Assert2(Constants.insert(SI.getCaseValue(i)),
-            "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+    Assert2(Constants.insert(i.getCaseValue()),
+            "Duplicate integer as switch case", &SI, i.getCaseValue());
   }
 
   visitTerminatorInst(SI);
diff --git a/projects/sample/Makefile.llvm.config.in b/projects/sample/Makefile.llvm.config.in
index 9e0c34c..deef6b2 100644
--- a/projects/sample/Makefile.llvm.config.in
+++ b/projects/sample/Makefile.llvm.config.in
@@ -143,7 +143,6 @@ SED        := @SED@
 TAR        := @TAR@
 
 # Paths to miscellaneous programs we hope are present but might not be
-PERL       := @PERL@
 BZIP2      := @BZIP2@
 CAT        := @CAT@
 DOT        := @DOT@
@@ -162,7 +161,6 @@ RUNTEST    := @RUNTEST@
 TCLSH      := @TCLSH@
 ZIP        := @ZIP@
 
-HAVE_PERL    := @HAVE_PERL@
 HAVE_PTHREAD := @HAVE_PTHREAD@
 
 LIBS       := @LIBS@
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index a1110ec..25d678a 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -909,16 +909,6 @@ if test "$XDOT_PY" != "echo xdot.py" ; then
    [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
 fi
 
-dnl Look for a sufficiently recent version of Perl.
-LLVM_PROG_PERL([5.006])
-AC_SUBST(PERL)
-if test x"$PERL" = xnone; then
-   AC_SUBST(HAVE_PERL,0)
-   AC_MSG_ERROR([perl is required but was not found, please install it])
-else
-   AC_SUBST(HAVE_PERL,1)
-fi
-
 dnl Find the install program
 AC_PROG_INSTALL
 dnl Prepend src dir to install path dir if it's a relative path
diff --git a/projects/sample/autoconf/m4/path_perl.m4 b/projects/sample/autoconf/m4/path_perl.m4
deleted file mode 100644
index 406656c..0000000
--- a/projects/sample/autoconf/m4/path_perl.m4
+++ /dev/null
@@ -1,16 +0,0 @@
-dnl Check for a reasonable version of Perl.
-dnl   $1 - Minimum Perl version.  Typically 5.006.
-dnl 
-AC_DEFUN([LLVM_PROG_PERL], [
-AC_PATH_PROG(PERL, [perl], [none])
-if test "$PERL" != "none"; then
-  AC_MSG_CHECKING(for Perl $1 or newer)
-  if $PERL -e 'use $1;' 2>&1 > /dev/null; then
-    AC_MSG_RESULT(yes)
-  else
-    PERL=none
-    AC_MSG_RESULT(not found)
-  fi
-fi
-])
-
diff --git a/projects/sample/configure b/projects/sample/configure
index ca654d0..db45550 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -733,8 +733,6 @@ CIRCO
 GV
 DOTTY
 XDOT_PY
-PERL
-HAVE_PERL
 INSTALL_PROGRAM
 INSTALL_SCRIPT
 INSTALL_DATA
@@ -6807,73 +6805,6 @@ _ACEOF
 
 fi
 
-
-# Extract the first word of "perl", so it can be a program name with args.
-set dummy perl; ac_word=$2
-{ echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
-if test "${ac_cv_path_PERL+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  case $PERL in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PERL="$PERL" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_PERL="$as_dir/$ac_word$ac_exec_ext"
-    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-done
-IFS=$as_save_IFS
-
-  test -z "$ac_cv_path_PERL" && ac_cv_path_PERL="none"
-  ;;
-esac
-fi
-PERL=$ac_cv_path_PERL
-if test -n "$PERL"; then
-  { echo "$as_me:$LINENO: result: $PERL" >&5
-echo "${ECHO_T}$PERL" >&6; }
-else
-  { echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6; }
-fi
-
-
-if test "$PERL" != "none"; then
-  { echo "$as_me:$LINENO: checking for Perl 5.006 or newer" >&5
-echo $ECHO_N "checking for Perl 5.006 or newer... $ECHO_C" >&6; }
-  if $PERL -e 'use 5.006;' 2>&1 > /dev/null; then
-    { echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6; }
-  else
-    PERL=none
-    { echo "$as_me:$LINENO: result: not found" >&5
-echo "${ECHO_T}not found" >&6; }
-  fi
-fi
-
-
-if test x"$PERL" = xnone; then
-   HAVE_PERL=0
-
-   { { echo "$as_me:$LINENO: error: perl is required but was not found, please install it" >&5
-echo "$as_me: error: perl is required but was not found, please install it" >&2;}
-   { (exit 1); exit 1; }; }
-else
-   HAVE_PERL=1
-
-fi
-
 # Find a good install program.  We prefer a C program (faster),
 # so one script is as good as another.  But avoid the broken or
 # incompatible versions:
@@ -10366,7 +10297,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10369 "configure"
+#line 10300 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -21790,8 +21721,6 @@ CIRCO!$CIRCO$ac_delim
 GV!$GV$ac_delim
 DOTTY!$DOTTY$ac_delim
 XDOT_PY!$XDOT_PY$ac_delim
-PERL!$PERL$ac_delim
-HAVE_PERL!$HAVE_PERL$ac_delim
 INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
 INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
 INSTALL_DATA!$INSTALL_DATA$ac_delim
@@ -21847,7 +21776,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 7aae3ac..a8afc20 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index 23e1aa1..770ad44 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -4,36 +4,12 @@
 define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK: foo:
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
-  %tmp40 = load <4 x i8>* undef
-  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
-  %conv42 = zext i8 %tmp41 to i32
-  %conv43 = sitofp i32 %conv42 to float
-  %div44 = fdiv float %conv43, 2.560000e+02
-  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
-  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit46, <4 x float>* undef
-  br i1 undef, label %if.then105, label %if.else109
-
-if.then105:                                       ; preds = %entry
-  br label %if.end114
-
-if.else109:                                       ; preds = %entry
-  br label %if.end114
-
-if.end114:                                        ; preds = %if.else109, %if.then105
-  %call185 = call float @bar()
-  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
-  %call189 = call float @bar()
-  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
-  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit191, <4 x float>* undef
+  tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
 ; CHECK: vpop {d10, d11}
 ; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
new file mode 100644
index 0000000..c9ea691
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 -verify-machineinstrs < %s
+; PR12165
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "arm-none-linux"
+
+define hidden void @_strtod_r() nounwind {
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %1, %0
+  br i1 undef, label %3, label %8
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %7
+
+; <label>:4                                       ; preds = %3
+  %5 = call i32 @llvm.flt.rounds()
+  %6 = icmp eq i32 %5, 1
+  br i1 %6, label %8, label %7
+
+; <label>:7                                       ; preds = %4, %3
+  unreachable
+
+; <label>:8                                       ; preds = %4, %2
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br label %10
+
+; <label>:10                                      ; preds = %9, %8
+  ret void
+}
+
+declare i32 @llvm.flt.rounds() nounwind
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
new file mode 100644
index 0000000..6d596df
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: ldrh [[REG:r[0-9]+]]
+; CHECK: strh [[REG]]
+  %0 = load <3 x i16> * %srcA, align 8
+  %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+  store <2 x i16> %1, <2 x i16> * %dst, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 1693066..660156a 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -229,3 +229,22 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   ret void
 }
+
+; rdar://11038907
+; When comparing LONG_MIN/INT_MIN use a cmp instruction.
+define void @t13() nounwind ssp {
+entry:
+; ARM: t13
+; THUMB: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ARM: cmp r{{[0-9]}}, #-2147483648
+; THUMB: cmp.w r{{[0-9]}}, #-2147483648
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 80925c7..2d8f710 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -9,7 +9,7 @@ entry:
 ; CHECK: vldr [[S0:s[0-9]+]],
 ; CHECK: vldr [[S1:s[0-9]+]],
 ; CHECK: vcmpe.f32 [[S1]], [[S0]]
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 2e6b3e3..4a4c5b1 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -5,7 +5,7 @@ define i32 @f7(float %a, float %b) {
 entry:
 ; CHECK: f7:
 ; CHECK: vcmpe.f32
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
 ; CHECK-NOT: vmrs
 ; CHECK: movwvs
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 63f8557..0f142ee 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -18,7 +18,7 @@ bb.nph:                                           ; preds = %entry
 
 bb:                                               ; preds = %bb4, %bb.nph
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
@@ -33,7 +33,7 @@ bb1:                                              ; preds = %bb
 ; CHECK-NOT: vcmpemi
 ; CHECK-NOT: vmrsmi
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
   %6 = load double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 3cb8a8e..c50a233 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -41,3 +41,45 @@ entry:
         ret i32 %0
 }
 
+define i32 @tn9(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn9:
+; CHECK: add	r0, r0, r0, lsl #3
+; CHECK: rsb	r0, r0, #0
+        %0 = mul i32 %v, -9
+        ret i32 %0
+}
+
+define i32 @tn7(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn7:
+; CHECK: sub r0, r0, r0, lsl #3
+	%0 = mul i32 %v, -7
+	ret i32 %0
+}
+
+define i32 @tn5(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn5:
+; CHECK: add r0, r0, r0, lsl #2
+; CHECK: rsb r0, r0, #0
+        %0 = mul i32 %v, -5
+        ret i32 %0
+}
+
+define i32 @tn3(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn3:
+; CHECK: sub r0, r0, r0, lsl #2
+        %0 = mul i32 %v, -3
+        ret i32 %0
+}
+
+define i32 @tn12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn12288:
+; CHECK: sub r0, r0, r0, lsl #2
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, -12288
+        ret i32 %0
+}
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index 677b9c2..277bd05 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs -O0
 ; PR12177
 ;
 ; This test case spills a QQQQ register.
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 3a66ec5..ca2e18a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -67,7 +67,7 @@ entry:
 
 ; T2: t5:
 ; T2-NOT: moveq
-; T2: orreq.w r2, r2, #1
+; T2: orreq r2, r2, #1
   %tmp1 = icmp eq i32 %a, %b
   %tmp2 = zext i1 %tmp1 to i32
   %tmp3 = or i32 %tmp2, %c
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 521ffa1..eb971ff 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -54,12 +54,16 @@ declare i8* @malloc(...)
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A8: str [[REG]], [r0]
+; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A8: str [[REG]], [r0, r1, lsl #2]
+; A8-NOT: str [[REG]], [r0]
 
 ; A9: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A9: str [[REG]], [r0]
+; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; A9: str [[REG]], [r0, r1, lsl #2]
+; A9-NOT: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index f518843..9c4838a 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s
+; RUN: llc -march=mips -soft-float < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "psp"
 	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
 	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
 	%struct.__sbuf = type { i8*, i32 }
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
new file mode 100644
index 0000000..b890e1d
--- /dev/null
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @foo0(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movn
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 0, i32 %0
+  ret i32 %cond
+}
+
+define i32 @foo1(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movz
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 %0, i32 0
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
new file mode 100644
index 0000000..fec17e9
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
@@ -0,0 +1,10 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; CHECK: build_vector_again
+define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
+entry:
+  %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: shufb
+  ret <4 x i8> %out
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
new file mode 100644
index 0000000..d24647e
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/11070338>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK:      _.memset_pattern:
+; CHECK-NEXT: .quad   4575657222473777152
+; CHECK-NEXT: .quad   4575657222473777152
+
+@.memset_pattern = internal unnamed_addr constant i128 or (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 shl (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 64)), align 16
+
+define void @foo(i8* %a, i64 %b) {
+  call void @memset_pattern16(i8* %a, i8* bitcast (i128* @.memset_pattern to i8*), i64 %b)
+  ret void
+}
+
+declare void @memset_pattern16(i8*, i8*, i64)
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 947d79f..54f01e9 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -135,3 +135,15 @@ define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
   ret <4 x i32>%x1
 }
 
+; rdar://10974078
+define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+; CHECK: test16
+; CHECK: vmovups
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index dccf901b..fe0f6ca 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: @A
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -8,6 +9,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: @B
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -16,3 +18,89 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: @t0
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+; CHECK: @t1
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to i8*
+  tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @t2
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to <2 x double>*
+  store <2 x double> %0, <2 x double>* %1, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+; CHECK: @t3
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to i8*
+  tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+; CHECK: @t4
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 16
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; CHECK: @t5
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovdqu %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <2 x i64>* %addr to i8*
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index def2212..9a954fe 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -104,3 +104,28 @@ ret <8 x i32> %res
 }
 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
 
+; rdar://10643481
+; CHECK: vinsertf128_combine
+define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovaps
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
+
+; rdar://11076953
+; CHECK: vinsertf128_ucombine
+define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovups
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
index 2026472..c9f2fc2 100644
--- a/test/CodeGen/X86/legalize-shift-64.ll
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
 
 define i64 @test1(i32 %xx, i32 %test) nounwind {
   %conv = zext i32 %xx to i64
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 527a5a6..1311a73 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
 target datalayout = "e-p:64:64:64"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
new file mode 100644
index 0000000..f3c2af8
--- /dev/null
+++ b/test/CodeGen/X86/misched-new.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
+; XFAIL: *
+; ...should pass. See PR12324: misched bringup
+;
+; Interesting MachineScheduler cases.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; From oggenc.
+; After coalescing, we have a dead superreg (RAX) definition.
+;
+; CHECK: xorl %esi, %esi
+; CHECK: movl $32, %ecx
+; CHECK: rep;movsl
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.cond.preheader, label %if.end
+
+for.cond.preheader:                               ; preds = %entry
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index f6f0ed1..75f438d 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index cea9e9c..6432ae3 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; <rdar://problem/8006248>
 
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index e4c5c80..078b740 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,8 +3,8 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x00000038: DW_TAG_subprogram [5] *
-; CHECK: 0x0000005e: DW_AT_specification [DW_FORM_ref4]      (cu + 0x0038 => {0x00000038})
+; CHECK: 0x0000003a: DW_TAG_subprogram [5] *
+; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003a => {0x0000003a})
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 055a9bf..a227071 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -7,7 +7,7 @@
 ; first check that we have a TAG_subprogram at a given offset and it has
 ; AT_inline.
 
-; CHECK: 0x00000130:   DW_TAG_subprogram [18]
+; CHECK: 0x00000134:   DW_TAG_subprogram [18]
 ; CHECK-NEXT:     DW_AT_MIPS_linkage_name
 ; CHECK-NEXT:     DW_AT_specification
 ; CHECK-NEXT:     DW_AT_inline
@@ -15,8 +15,8 @@
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
-; CHECK: 0x00000180:   DW_TAG_subprogram [20]
-; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0130 => {0x00000130})
+; CHECK: 0x00000184:   DW_TAG_subprogram [20]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0134 => {0x00000134})
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
 entry:
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
new file mode 100644
index 0000000..1a815f9
--- /dev/null
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: 0x00000027:   DW_TAG_structure_type
+; CHECK: 0x0000002c:     DW_AT_declaration
+; CHECK: 0x0000002d:     DW_AT_APPLE_runtime_class
+
+%0 = type opaque
+
+@a = common global %0* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10, !11, !12}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 8488434..5a001ee 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -3,8 +3,8 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: 0x0000005a:     DW_TAG_subprogram [5]
-; CHECK: 0x0000007a:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005a => {0x0000005a})
+; CHECK: 0x0000005c:     DW_TAG_subprogram [5]
+; CHECK: 0x0000007c:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005c => {0x0000005c})
 
 %struct.foo = type { i8 }
 
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
new file mode 100644
index 0000000..9a04738
--- /dev/null
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Make sure that structures have a decl file and decl line attached.
+; CHECK: DW_TAG_structure_type [3]
+; CHECK: DW_AT_decl_file
+; CHECK: DW_AT_decl_line
+; CHECK: DW_TAG_member
+
+%struct.foo = type { i32 }
+
+@f = common global %struct.foo zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
new file mode 100644
index 0000000..a7fdf70
--- /dev/null
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -0,0 +1,6 @@
+; RUN: llc
+
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{null, null, null}
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
new file mode 100644
index 0000000..46273d3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -0,0 +1,37 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define void @getoptions(i32* %argc) {
+bb0:
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+	call i32 (i8*, ...)* @printf( i8* getelementptr ([10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
+	%cast224 = bitcast i8** %argv to i8*		; <i8*> [#uses=1]
+	%local = alloca i8*		; <i8**> [#uses=3]
+	store i8* %cast224, i8** %local
+	%cond226 = icmp sle i32 %argc, 0		; <i1> [#uses=1]
+	br i1 %cond226, label %bb3, label %bb2
+bb2:		; preds = %bb2, %bb0
+	%cann-indvar = phi i32 [ 0, %bb0 ], [ %add1-indvar, %bb2 ]		; <i32> [#uses=2]
+	%add1-indvar = add i32 %cann-indvar, 1		; <i32> [#uses=2]
+	%cann-indvar-idxcast = sext i32 %cann-indvar to i64		; <i64> [#uses=1]
+	%CT = bitcast i8** %local to i8***		; <i8***> [#uses=1]
+	%reg115 = load i8*** %CT		; <i8**> [#uses=1]
+	%cast235 = getelementptr i8** %reg115, i64 %cann-indvar-idxcast		; <i8**> [#uses=1]
+	%reg117 = load i8** %cast235		; <i8*> [#uses=1]
+	%reg236 = call i32 @puts( i8* %reg117 )		; <i32> [#uses=0]
+	%cond239 = icmp slt i32 %add1-indvar, %argc		; <i1> [#uses=1]
+	br i1 %cond239, label %bb2, label %bb3
+bb3:		; preds = %bb2, %bb0
+	%cast243 = bitcast i8** %local to i32*		; <i32*> [#uses=1]
+	call void @getoptions( i32* %cast243 )
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
new file mode 100644
index 0000000..88bfbb3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
@@ -0,0 +1,13 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @foo(i32 %X, i32 %Y, double %A) {
+	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
+	%cast110 = zext i1 %cond212 to i32		; <i32> [#uses=1]
+	ret i32 %cast110
+}
+
+define i32 @main() {
+	%reg212 = call i32 @foo( i32 0, i32 1, double 1.000000e+00 )		; <i32> [#uses=1]
+	ret i32 %reg212
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
new file mode 100644
index 0000000..d5f860d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
@@ -0,0 +1,20 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
+define internal i32 @mylog(i32 %num) {
+bb0:
+	br label %bb2
+bb2:		; preds = %bb2, %bb0
+	%reg112 = phi i32 [ 10, %bb2 ], [ 1, %bb0 ]		; <i32> [#uses=1]
+	%cann-indvar = phi i32 [ %cann-indvar, %bb2 ], [ 0, %bb0 ]		; <i32> [#uses=1]
+	%reg114 = add i32 %reg112, 1		; <i32> [#uses=2]
+	%cond222 = icmp slt i32 %reg114, %num		; <i1> [#uses=1]
+	br i1 %cond222, label %bb2, label %bb3
+bb3:		; preds = %bb2
+	ret i32 %reg114
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
new file mode 100644
index 0000000..721f2e8
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%X = phi i32 [ 0, %0 ], [ 1, %Loop ]		; <i32> [#uses=1]
+	br i1 true, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 %X
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
new file mode 100644
index 0000000..d17df99
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; We were accidentally inverting the signedness of right shifts.  Whoops.
+
+define i32 @main() {
+	%X = ashr i32 -1, 16		; <i32> [#uses=1]
+	%Y = ashr i32 %X, 16		; <i32> [#uses=1]
+	%Z = add i32 %Y, 1		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
new file mode 100644
index 0000000..e55cb06
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
@@ -0,0 +1,10 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
+	%Y = fsub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
+	%Z = fcmp oeq double %X, %Y		; <i1> [#uses=0]
+	fadd double %Y, 0.000000e+00		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
new file mode 100644
index 0000000..663dc40
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @bar(i8* %X) {
+        ; pointer should be 4 byte aligned!
+	%P = alloca double		; <double*> [#uses=1]
+	%R = ptrtoint double* %P to i32		; <i32> [#uses=1]
+	%A = and i32 %R, 3		; <i32> [#uses=1]
+	ret i32 %A
+}
+
+define i32 @main() {
+	%SP = alloca i8		; <i8*> [#uses=1]
+	%X = add i32 0, 0		; <i32> [#uses=1]
+	alloca i8, i32 %X		; <i8*>:1 [#uses=0]
+	call i32 @bar( i8* %SP )		; <i32>:2 [#uses=1]
+	ret i32 %2
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
new file mode 100644
index 0000000..e95294b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -0,0 +1,19 @@
+; This testcase should return with an exit code of 1.
+;
+; RUN: not %lli -use-mcjit %s
+
+@test = global i64 0		; <i64*> [#uses=1]
+
+define internal i64 @test.upgrd.1() {
+	%tmp.0 = load i64* @test		; <i64> [#uses=1]
+	%tmp.1 = add i64 %tmp.0, 1		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i32 @main() {
+	%L = call i64 @test.upgrd.1( )		; <i64> [#uses=1]
+	%I = trunc i64 %L to i32		; <i32> [#uses=1]
+	ret i32 %I
+}
+
+
diff --git a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
new file mode 100644
index 0000000..a237194
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s test
+
+declare i32 @puts(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+	%tmp.5 = getelementptr i8** %argv.1, i64 1		; <i8**> [#uses=1]
+	%tmp.6 = load i8** %tmp.5		; <i8*> [#uses=1]
+	%tmp.0 = call i32 @puts( i8* %tmp.6 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
new file mode 100644
index 0000000..70464a3
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
@@ -0,0 +1,15 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %endif
+then:		; No predecessors!
+	br label %endif
+endif:		; preds = %then, %entry
+	%x = phi i32 [ 4, %entry ], [ 27, %then ]		; <i32> [#uses=0]
+	%result = phi i32 [ 32, %then ], [ 0, %entry ]		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
new file mode 100644
index 0000000..58d423f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
@@ -0,0 +1,19 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %loopentry.0, %entry
+	%h.0 = phi i32 [ %tmp.2, %loopentry.0 ], [ -1, %entry ]		; <i32> [#uses=1]
+	%tmp.2 = add i32 %h.0, 1		; <i32> [#uses=3]
+	%tmp.4 = icmp ne i32 %tmp.2, 0		; <i1> [#uses=1]
+	br i1 %tmp.4, label %loopentry.0, label %loopentry.1
+loopentry.1:		; preds = %loopentry.0
+	%h.1 = phi i32 [ %tmp.2, %loopentry.0 ]		; <i32> [#uses=1]
+	ret i32 %h.1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
new file mode 100644
index 0000000..a22fe07
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	%X = add i32 1, -1		; <i32> [#uses=3]
+	br label %Next
+Next:		; preds = %entry
+	%A = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%B = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%C = phi i32 [ %X, %entry ]		; <i32> [#uses=1]
+	ret i32 %C
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
new file mode 100644
index 0000000..b3c6d8a
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This testcase failed to work because two variable sized allocas confused the
+; local register allocator.
+
+define i32 @main(i32 %X) {
+	%A = alloca i32, i32 %X		; <i32*> [#uses=0]
+	%B = alloca float, i32 %X		; <float*> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
new file mode 100644
index 0000000..bd32f30
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+;
+; Regression Test: EnvironmentTest.ll
+;
+; Description:
+;	This is a regression test that verifies that the JIT passes the
+;	environment to the main() function.
+;
+
+
+declare i32 @strlen(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1, i8** %envp.1) {
+	%tmp.2 = load i8** %envp.1		; <i8*> [#uses=1]
+	%tmp.3 = call i32 @strlen( i8* %tmp.2 )		; <i32> [#uses=1]
+	%T = icmp eq i32 %tmp.3, 0		; <i1> [#uses=1]
+	%R = zext i1 %T to i32		; <i32> [#uses=1]
+	ret i32 %R
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
new file mode 100644
index 0000000..1959534
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This testcase exposes a bug in the local register allocator where it runs out
+; of registers (due to too many overlapping live ranges), but then attempts to
+; use the ESP register (which is not allocatable) to hold a value.
+
+define i32 @main(i32 %A) {
+        ; ESP gets used again...
+	%Ap2 = alloca i32, i32 %A		; <i32*> [#uses=11]
+	; Produce lots of overlapping live ranges
+        %B = add i32 %A, 1		; <i32> [#uses=1]
+	%C = add i32 %A, 2		; <i32> [#uses=1]
+	%D = add i32 %A, 3		; <i32> [#uses=1]
+	%E = add i32 %A, 4		; <i32> [#uses=1]
+	%F = add i32 %A, 5		; <i32> [#uses=1]
+	%G = add i32 %A, 6		; <i32> [#uses=1]
+	%H = add i32 %A, 7		; <i32> [#uses=1]
+	%I = add i32 %A, 8		; <i32> [#uses=1]
+	%J = add i32 %A, 9		; <i32> [#uses=1]
+	%K = add i32 %A, 10		; <i32> [#uses=1]
+        ; Uses of all of the values
+	store i32 %A, i32* %Ap2
+	store i32 %B, i32* %Ap2
+	store i32 %C, i32* %Ap2
+	store i32 %D, i32* %Ap2
+	store i32 %E, i32* %Ap2
+	store i32 %F, i32* %Ap2
+	store i32 %G, i32* %Ap2
+	store i32 %H, i32* %Ap2
+	store i32 %I, i32* %Ap2
+	store i32 %J, i32* %Ap2
+	store i32 %K, i32* %Ap2
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
new file mode 100644
index 0000000..1f8343f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -0,0 +1,23 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@A = global i32 0		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%Ret = call i32 @test( i1 true, i32 0 )		; <i32> [#uses=1]
+	ret i32 %Ret
+}
+
+define i32 @test(i1 %c, i32 %A) {
+	br i1 %c, label %Taken1, label %NotTaken
+Cont:		; preds = %Taken1, %NotTaken
+	%V = phi i32 [ 0, %NotTaken ], [ sub (i32 ptrtoint (i32* @A to i32), i32 1234), %Taken1 ]		; <i32> [#uses=0]
+	ret i32 0
+NotTaken:		; preds = %0
+	br label %Cont
+Taken1:		; preds = %0
+	%B = icmp eq i32 %A, 0		; <i1> [#uses=1]
+	br i1 %B, label %Cont, label %ExitError
+ExitError:		; preds = %Taken1
+	ret i32 12
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
new file mode 100644
index 0000000..79a7d58
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
@@ -0,0 +1,22 @@
+; PR672
+; RUN: %lli -use-mcjit %s
+; XFAIL: mcjit-ia32
+
+define i32 @main() {
+	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
+	%res = tail call fastcc i32 @check_tail( i32 10, i32* %f, i32 10 )		; <i32> [#uses=1]
+	ret i32 %res
+}
+
+define fastcc i32 @check_tail(i32 %x, i32* %f, i32 %g) {
+	%tmp1 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %tmp1, label %if-then, label %if-else
+if-then:		; preds = %0
+	%fun_ptr = bitcast i32* %f to i32 (i32, i32*, i32)*		; <i32 (i32, i32*, i32)*> [#uses=1]
+	%arg1 = add i32 %x, -1		; <i32> [#uses=1]
+	%res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32* %f, i32 %g )		; <i32> [#uses=1]
+	ret i32 %res
+if-else:		; preds = %0
+	ret i32 %x
+}
+
diff --git a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
new file mode 100644
index 0000000..52cef4d
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -0,0 +1,19 @@
+; RUN: %lli -use-mcjit -force-interpreter %s
+; PR1836
+
+define i32 @main() {
+entry:
+    %retval = alloca i32        ; <i32*> [#uses=2]
+    %tmp = alloca i32       ; <i32*> [#uses=2]
+    %x = alloca i75, align 16       ; <i75*> [#uses=1]
+    %"alloca point" = bitcast i32 0 to i32      ; <i32> [#uses=0]
+    store i75 999, i75* %x, align 16
+    store i32 0, i32* %tmp, align 4
+    %tmp1 = load i32* %tmp, align 4     ; <i32> [#uses=1]
+    store i32 %tmp1, i32* %retval, align 4
+    br label %return
+
+return:     ; preds = %entry
+    %retval2 = load i32* %retval        ; <i32> [#uses=1]
+    ret i32 %retval2
+}
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
new file mode 100644
index 0000000..a6e917f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -0,0 +1,59 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s | grep 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+@.str = internal constant [10 x i8] c"MSB = %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+define i65 @foo(i65 %x) {
+entry:
+	%x_addr = alloca i65		; <i65*> [#uses=2]
+	%retval = alloca i65		; <i65*> [#uses=2]
+	%tmp = alloca i65		; <i65*> [#uses=2]
+	%"alloca point" = bitcast i65 0 to i65		; <i65> [#uses=0]
+	store i65 %x, i65* %x_addr
+	%tmp1 = load i65* %x_addr, align 4		; <i65> [#uses=1]
+	%tmp2 = ashr i65 %tmp1, 65		; <i65> [#uses=1]
+	store i65 %tmp2, i65* %tmp, align 4
+	%tmp3 = load i65* %tmp, align 4		; <i65> [#uses=1]
+	store i65 %tmp3, i65* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i65* %retval		; <i65> [#uses=1]
+	ret i65 %retval4
+}
+
+define i32 @main() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i65 @foo( i65 -9 )		; <i65> [#uses=1]
+	%tmp1 = lshr i65 %tmp, 64		; <i65> [#uses=1]
+	%tmp2 = xor i65 %tmp1, 1		; <i65> [#uses=1]
+	%tmp3 = and i65 %tmp2, 1		; <i65> [#uses=1]
+	%tmp34 = trunc i65 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	store i32 0, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_false:		; preds = %entry
+	store i32 1, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_next:		; preds = %cond_false, %cond_true
+	%tmp5 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp6 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next
+    store i32 0, i32* %retval, align 4
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @printf(i8* noalias , ...) nounwind 
diff --git a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
new file mode 100644
index 0000000..524a724
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
@@ -0,0 +1,8 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s
+
+define i32 @main() {
+       %a = add i32 0, undef
+       %b = fadd float 0.0, undef
+       %c = fadd double 0.0, undef
+       ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
new file mode 100644
index 0000000..9da908f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -0,0 +1,20 @@
+; RUN: %lli -use-mcjit -force-interpreter=true %s | grep 40091eb8
+;
+define i32 @test(double %x) {
+entry:
+	%x46.i = bitcast double %x to i64	
+	%tmp343.i = lshr i64 %x46.i, 32	
+	%tmp344.i = trunc i64 %tmp343.i to i32
+        ret i32 %tmp344.i
+}
+
+define i32 @main()
+{
+       %res = call i32 @test(double 3.14)
+       %ptr = getelementptr [4 x i8]* @format, i32 0, i32 0
+       call i32 (i8*,...)* @printf(i8* %ptr, i32 %res)
+       ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+@format = internal constant [4 x i8] c"%x\0A\00"
diff --git a/test/ExecutionEngine/MCJIT/hello.ll b/test/ExecutionEngine/MCJIT/hello.ll
new file mode 100644
index 0000000..a52b6d4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
new file mode 100644
index 0000000..670a6dd
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -0,0 +1,17 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@X = global i32 7		; <i32*> [#uses=0]
+@msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
+
+declare void @printf([13 x i8]*, ...)
+
+define void @bar() {
+	call void ([13 x i8]*, ...)* @printf( [13 x i8]* @msg )
+	ret void
+}
+
+define i32 @main() {
+	call void @bar( )
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/lit.local.cfg b/test/ExecutionEngine/MCJIT/lit.local.cfg
new file mode 100644
index 0000000..f943fe4
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/lit.local.cfg
@@ -0,0 +1,17 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if ('X86' in targets) | ('ARM' in targets):
+    config.unsupported = False
+else:
+    config.unsupported = True
+
+if root.host_os in ['Win32', 'Cygwin', 'MingW']:
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/simplesttest.ll b/test/ExecutionEngine/MCJIT/simplesttest.ll
new file mode 100644
index 0000000..a6688c2
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simplesttest.ll
@@ -0,0 +1,6 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/simpletest.ll b/test/ExecutionEngine/MCJIT/simpletest.ll
new file mode 100644
index 0000000..4562aa6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/simpletest.ll
@@ -0,0 +1,11 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @bar() {
+	ret i32 0
+}
+
+define i32 @main() {
+	%r = call i32 @bar( )		; <i32> [#uses=1]
+	ret i32 %r
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-arith.ll b/test/ExecutionEngine/MCJIT/test-arith.ll
new file mode 100644
index 0000000..3177760
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-arith.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%A = add i8 0, 12		; <i8> [#uses=1]
+	%B = sub i8 %A, 1		; <i8> [#uses=2]
+	%C = mul i8 %B, %B		; <i8> [#uses=2]
+	%D = sdiv i8 %C, %C		; <i8> [#uses=2]
+	%E = srem i8 %D, %D		; <i8> [#uses=0]
+	%F = udiv i8 5, 6		; <i8> [#uses=0]
+	%G = urem i8 6, 5		; <i8> [#uses=0]
+	%A.upgrd.1 = add i16 0, 12		; <i16> [#uses=1]
+	%B.upgrd.2 = sub i16 %A.upgrd.1, 1		; <i16> [#uses=2]
+	%C.upgrd.3 = mul i16 %B.upgrd.2, %B.upgrd.2		; <i16> [#uses=2]
+	%D.upgrd.4 = sdiv i16 %C.upgrd.3, %C.upgrd.3		; <i16> [#uses=2]
+	%E.upgrd.5 = srem i16 %D.upgrd.4, %D.upgrd.4		; <i16> [#uses=0]
+	%F.upgrd.6 = udiv i16 5, 6		; <i16> [#uses=0]
+	%G.upgrd.7 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.8 = add i32 0, 12		; <i32> [#uses=1]
+	%B.upgrd.9 = sub i32 %A.upgrd.8, 1		; <i32> [#uses=2]
+	%C.upgrd.10 = mul i32 %B.upgrd.9, %B.upgrd.9		; <i32> [#uses=2]
+	%D.upgrd.11 = sdiv i32 %C.upgrd.10, %C.upgrd.10		; <i32> [#uses=2]
+	%E.upgrd.12 = srem i32 %D.upgrd.11, %D.upgrd.11		; <i32> [#uses=0]
+	%F.upgrd.13 = udiv i32 5, 6		; <i32> [#uses=0]
+	%G1 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.14 = add i64 0, 12		; <i64> [#uses=1]
+	%B.upgrd.15 = sub i64 %A.upgrd.14, 1		; <i64> [#uses=2]
+	%C.upgrd.16 = mul i64 %B.upgrd.15, %B.upgrd.15		; <i64> [#uses=2]
+	%D.upgrd.17 = sdiv i64 %C.upgrd.16, %C.upgrd.16		; <i64> [#uses=2]
+	%E.upgrd.18 = srem i64 %D.upgrd.17, %D.upgrd.17		; <i64> [#uses=0]
+	%F.upgrd.19 = udiv i64 5, 6		; <i64> [#uses=0]
+	%G.upgrd.20 = urem i64 6, 5		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-branch.ll b/test/ExecutionEngine/MCJIT/test-branch.ll
new file mode 100644
index 0000000..702c110
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-branch.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test unconditional branch
+define i32 @main() {
+	br label %Test
+Test:		; preds = %Test, %0
+	%X = icmp eq i32 0, 4		; <i1> [#uses=1]
+	br i1 %X, label %Test, label %Label
+Label:		; preds = %Test
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
new file mode 100644
index 0000000..6f28405
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
@@ -0,0 +1,14 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @_Z14func_exit_codev() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @_Z14func_exit_codev()
+  ret i32 %call
+}
diff --git a/test/ExecutionEngine/MCJIT/test-call.ll b/test/ExecutionEngine/MCJIT/test-call.ll
new file mode 100644
index 0000000..7a244ee
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-call.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+declare void @exit(i32)
+
+define i32 @test(i8 %C, i16 %S) {
+	%X = trunc i16 %S to i8		; <i8> [#uses=1]
+	%Y = zext i8 %X to i32		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define void @FP(void (i32)* %F) {
+	%X = call i32 @test( i8 123, i16 1024 )		; <i32> [#uses=1]
+	call void %F( i32 %X )
+	ret void
+}
+
+define i32 @main() {
+	call void @FP( void (i32)* @exit )
+	ret i32 1
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-cast.ll b/test/ExecutionEngine/MCJIT/test-cast.ll
new file mode 100644
index 0000000..75e7d1b
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-cast.ll
@@ -0,0 +1,109 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @foo() {
+	ret i32 0
+}
+
+define i32 @main() {
+	icmp ne i1 true, false		; <i1>:1 [#uses=0]
+	zext i1 true to i8		; <i8>:2 [#uses=0]
+	zext i1 true to i8		; <i8>:3 [#uses=0]
+	zext i1 true to i16		; <i16>:4 [#uses=0]
+	zext i1 true to i16		; <i16>:5 [#uses=0]
+	zext i1 true to i32		; <i32>:6 [#uses=0]
+	zext i1 true to i32		; <i32>:7 [#uses=0]
+	zext i1 true to i64		; <i64>:8 [#uses=0]
+	zext i1 true to i64		; <i64>:9 [#uses=0]
+	uitofp i1 true to float		; <float>:10 [#uses=0]
+	uitofp i1 true to double		; <double>:11 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:12 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:13 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:14 [#uses=0]
+	bitcast i8 -1 to i8		; <i8>:15 [#uses=0]
+	sext i8 4 to i16		; <i16>:16 [#uses=0]
+	sext i8 4 to i16		; <i16>:17 [#uses=0]
+	sext i8 4 to i64		; <i64>:18 [#uses=0]
+	sext i8 4 to i64		; <i64>:19 [#uses=0]
+	sitofp i8 4 to float		; <float>:20 [#uses=0]
+	sitofp i8 4 to double		; <double>:21 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:22 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:23 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:24 [#uses=0]
+	bitcast i8 1 to i8		; <i8>:25 [#uses=0]
+	zext i8 4 to i16		; <i16>:26 [#uses=0]
+	zext i8 4 to i16		; <i16>:27 [#uses=0]
+	zext i8 4 to i64		; <i64>:28 [#uses=0]
+	zext i8 4 to i64		; <i64>:29 [#uses=0]
+	uitofp i8 0 to float		; <float>:30 [#uses=0]
+	uitofp i8 0 to double		; <double>:31 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:32 [#uses=0]
+	trunc i16 -1 to i8		; <i8>:33 [#uses=0]
+	trunc i16 255 to i8		; <i8>:34 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:35 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:36 [#uses=0]
+	sext i16 0 to i64		; <i64>:37 [#uses=0]
+	sext i16 0 to i64		; <i64>:38 [#uses=0]
+	sitofp i16 0 to float		; <float>:39 [#uses=0]
+	sitofp i16 0 to double		; <double>:40 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:41 [#uses=0]
+	trunc i16 1 to i8		; <i8>:42 [#uses=0]
+	trunc i16 255 to i8		; <i8>:43 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:44 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:45 [#uses=0]
+	zext i16 0 to i64		; <i64>:46 [#uses=0]
+	zext i16 0 to i64		; <i64>:47 [#uses=0]
+	uitofp i16 0 to float		; <float>:48 [#uses=0]
+	uitofp i16 0 to double		; <double>:49 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:50 [#uses=0]
+	trunc i32 -6 to i8		; <i8>:51 [#uses=0]
+	trunc i32 6 to i8		; <i8>:52 [#uses=0]
+	trunc i32 6 to i16		; <i16>:53 [#uses=0]
+	bitcast i32 0 to i32		; <i32>:54 [#uses=0]
+	sext i32 0 to i64		; <i64>:55 [#uses=0]
+	sext i32 0 to i64		; <i64>:56 [#uses=0]
+	sitofp i32 0 to float		; <float>:57 [#uses=0]
+	sitofp i32 0 to double		; <double>:58 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:59 [#uses=0]
+	trunc i32 7 to i8		; <i8>:60 [#uses=0]
+	trunc i32 8 to i8		; <i8>:61 [#uses=0]
+	trunc i32 9 to i16		; <i16>:62 [#uses=0]
+	bitcast i32 10 to i32		; <i32>:63 [#uses=0]
+	zext i32 0 to i64		; <i64>:64 [#uses=0]
+	zext i32 0 to i64		; <i64>:65 [#uses=0]
+	uitofp i32 0 to float		; <float>:66 [#uses=0]
+	uitofp i32 0 to double		; <double>:67 [#uses=0]
+	icmp ne i64 0, 0		; <i1>:68 [#uses=0]
+	trunc i64 0 to i8		; <i8>:69 [#uses=0]
+	trunc i64 0 to i8		; <i8>:70 [#uses=0]
+	trunc i64 0 to i16		; <i16>:71 [#uses=0]
+	trunc i64 0 to i16		; <i16>:72 [#uses=0]
+	trunc i64 0 to i32		; <i32>:73 [#uses=0]
+	trunc i64 0 to i32		; <i32>:74 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:75 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:76 [#uses=0]
+	sitofp i64 0 to float		; <float>:77 [#uses=0]
+	sitofp i64 0 to double		; <double>:78 [#uses=0]
+	icmp ne i64 1, 0		; <i1>:79 [#uses=0]
+	trunc i64 1 to i8		; <i8>:80 [#uses=0]
+	trunc i64 1 to i8		; <i8>:81 [#uses=0]
+	trunc i64 1 to i16		; <i16>:82 [#uses=0]
+	trunc i64 1 to i16		; <i16>:83 [#uses=0]
+	trunc i64 1 to i32		; <i32>:84 [#uses=0]
+	trunc i64 1 to i32		; <i32>:85 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:86 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:87 [#uses=0]
+	uitofp i64 1 to float		; <float>:88 [#uses=0]
+	uitofp i64 0 to double		; <double>:89 [#uses=0]
+	bitcast float 0.000000e+00 to float		; <float>:90 [#uses=0]
+	fpext float 0.000000e+00 to double		; <double>:91 [#uses=0]
+	fptosi double 0.000000e+00 to i8		; <i8>:92 [#uses=0]
+	fptoui double 0.000000e+00 to i8		; <i8>:93 [#uses=0]
+	fptosi double 0.000000e+00 to i16		; <i16>:94 [#uses=0]
+	fptoui double 0.000000e+00 to i16		; <i16>:95 [#uses=0]
+	fptosi double 0.000000e+00 to i32		; <i32>:96 [#uses=0]
+	fptoui double 0.000000e+00 to i32		; <i32>:97 [#uses=0]
+	fptosi double 0.000000e+00 to i64		; <i64>:98 [#uses=0]
+	fptrunc double 0.000000e+00 to float		; <float>:99 [#uses=0]
+	bitcast double 0.000000e+00 to double		; <double>:100 [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-constantexpr.ll b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
new file mode 100644
index 0000000..6b46639
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
@@ -0,0 +1,12 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; This tests to make sure that we can evaluate weird constant expressions
+
+@A = global i32 5		; <i32*> [#uses=1]
+@B = global i32 6		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%A = or i1 false, icmp slt (i32* @A, i32* @B)		; <i1> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
new file mode 100644
index 0000000..35491df
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -0,0 +1,21 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-fp.ll b/test/ExecutionEngine/MCJIT/test-fp.ll
new file mode 100644
index 0000000..6fc5a50
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -0,0 +1,23 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Z = frem double %Y, %Y		; <double> [#uses=3]
+	%Z1 = fdiv double %Z, %W		; <double> [#uses=0]
+	%Q = fadd double %Z, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %R, double* %DP
+	ret double %Z
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
new file mode 100644
index 0000000..4a790c6
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-loadstore.ll b/test/ExecutionEngine/MCJIT/test-loadstore.ll
new file mode 100644
index 0000000..e917149
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -0,0 +1,31 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
+	%V = load i8* %P		; <i8> [#uses=1]
+	store i8 %V, i8* %P
+	%V.upgrd.4 = load i16* %P.upgrd.1		; <i16> [#uses=1]
+	store i16 %V.upgrd.4, i16* %P.upgrd.1
+	%V.upgrd.5 = load i32* %P.upgrd.2		; <i32> [#uses=1]
+	store i32 %V.upgrd.5, i32* %P.upgrd.2
+	%V.upgrd.6 = load i64* %P.upgrd.3		; <i64> [#uses=1]
+	store i64 %V.upgrd.6, i64* %P.upgrd.3
+	ret void
+}
+
+define i32 @varalloca(i32 %Size) {
+        ;; Variable sized alloca
+	%X = alloca i32, i32 %Size		; <i32*> [#uses=2]
+	store i32 %Size, i32* %X
+	%Y = load i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define i32 @main() {
+	%A = alloca i8		; <i8*> [#uses=1]
+	%B = alloca i16		; <i16*> [#uses=1]
+	%C = alloca i32		; <i32*> [#uses=1]
+	%D = alloca i64		; <i64*> [#uses=1]
+	call void @test( i8* %A, i16* %B, i32* %C, i64* %D )
+	call i32 @varalloca( i32 7 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-local.ll b/test/ExecutionEngine/MCJIT/test-local.ll
new file mode 100644
index 0000000..4f5ae47
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-local.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %count = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %count, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* %count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/MCJIT/test-logical.ll b/test/ExecutionEngine/MCJIT/test-logical.ll
new file mode 100644
index 0000000..0540c22
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-logical.ll
@@ -0,0 +1,18 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%A = and i8 4, 8		; <i8> [#uses=2]
+	%B = or i8 %A, 7		; <i8> [#uses=1]
+	%C = xor i8 %B, %A		; <i8> [#uses=0]
+	%A.upgrd.1 = and i16 4, 8		; <i16> [#uses=2]
+	%B.upgrd.2 = or i16 %A.upgrd.1, 7		; <i16> [#uses=1]
+	%C.upgrd.3 = xor i16 %B.upgrd.2, %A.upgrd.1		; <i16> [#uses=0]
+	%A.upgrd.4 = and i32 4, 8		; <i32> [#uses=2]
+	%B.upgrd.5 = or i32 %A.upgrd.4, 7		; <i32> [#uses=1]
+	%C.upgrd.6 = xor i32 %B.upgrd.5, %A.upgrd.4		; <i32> [#uses=0]
+	%A.upgrd.7 = and i64 4, 8		; <i64> [#uses=2]
+	%B.upgrd.8 = or i64 %A.upgrd.7, 7		; <i64> [#uses=1]
+	%C.upgrd.9 = xor i64 %B.upgrd.8, %A.upgrd.7		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-loop.ll b/test/ExecutionEngine/MCJIT/test-loop.ll
new file mode 100644
index 0000000..b1dbf40
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-loop.ll
@@ -0,0 +1,14 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%I = phi i32 [ 0, %0 ], [ %i2, %Loop ]		; <i32> [#uses=1]
+	%i2 = add i32 %I, 1		; <i32> [#uses=2]
+	%C = icmp eq i32 %i2, 10		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-phi.ll b/test/ExecutionEngine/MCJIT/test-phi.ll
new file mode 100644
index 0000000..fbc0808
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-phi.ll
@@ -0,0 +1,34 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test phi node
+@Y = global i32 6		; <i32*> [#uses=1]
+
+define void @blah(i32* %X) {
+; <label>:0
+	br label %T
+T:		; preds = %Dead, %0
+	phi i32* [ %X, %0 ], [ @Y, %Dead ]		; <i32*>:1 [#uses=0]
+	ret void
+Dead:		; No predecessors!
+	br label %T
+}
+
+define i32 @test(i1 %C) {
+; <label>:0
+	br i1 %C, label %T, label %T
+T:		; preds = %0, %0
+	%X = phi i32 [ 123, %0 ], [ 123, %0 ]		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @main() {
+; <label>:0
+	br label %Test
+Test:		; preds = %Dead, %0
+	%X = phi i32 [ 0, %0 ], [ %Y, %Dead ]		; <i32> [#uses=1]
+	ret i32 %X
+Dead:		; No predecessors!
+	%Y = ashr i32 12, 4		; <i32> [#uses=1]
+	br label %Test
+}
+
diff --git a/test/ExecutionEngine/MCJIT/test-ret.ll b/test/ExecutionEngine/MCJIT/test-ret.ll
new file mode 100644
index 0000000..1b90ee0
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-ret.ll
@@ -0,0 +1,46 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+; test return instructions
+define void @test1() {
+	ret void
+}
+
+define i8 @test2() {
+	ret i8 1
+}
+
+define i8 @test3() {
+	ret i8 1
+}
+
+define i16 @test4() {
+	ret i16 -1
+}
+
+define i16 @test5() {
+	ret i16 -1
+}
+
+define i32 @main() {
+	ret i32 0
+}
+
+define i32 @test6() {
+	ret i32 4
+}
+
+define i64 @test7() {
+	ret i64 0
+}
+
+define i64 @test8() {
+	ret i64 0
+}
+
+define float @test9() {
+	ret float 1.000000e+00
+}
+
+define double @test10() {
+	ret double 2.000000e+00
+}
diff --git a/test/ExecutionEngine/MCJIT/test-return.ll b/test/ExecutionEngine/MCJIT/test-return.ll
new file mode 100644
index 0000000..9c399ca
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-return.ll
@@ -0,0 +1,8 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
new file mode 100644
index 0000000..030ff31
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
@@ -0,0 +1,24 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+
+define i32 @main() {
+	%double1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%double2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%float1 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%float2 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%test49 = fcmp oeq float %float1, %float2		; <i1> [#uses=0]
+	%test50 = fcmp oge float %float1, %float2		; <i1> [#uses=0]
+	%test51 = fcmp ogt float %float1, %float2		; <i1> [#uses=0]
+	%test52 = fcmp ole float %float1, %float2		; <i1> [#uses=0]
+	%test53 = fcmp olt float %float1, %float2		; <i1> [#uses=0]
+	%test54 = fcmp une float %float1, %float2		; <i1> [#uses=0]
+	%test55 = fcmp oeq double %double1, %double2		; <i1> [#uses=0]
+	%test56 = fcmp oge double %double1, %double2		; <i1> [#uses=0]
+	%test57 = fcmp ogt double %double1, %double2		; <i1> [#uses=0]
+	%test58 = fcmp ole double %double1, %double2		; <i1> [#uses=0]
+	%test59 = fcmp olt double %double1, %double2		; <i1> [#uses=0]
+	%test60 = fcmp une double %double1, %double2		; <i1> [#uses=0]
+	ret i32 0
+}
+
+
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-int.ll b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
new file mode 100644
index 0000000..1113efe
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
@@ -0,0 +1,69 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%int1 = add i32 0, 0		; <i32> [#uses=6]
+	%int2 = add i32 0, 0		; <i32> [#uses=6]
+	%long1 = add i64 0, 0		; <i64> [#uses=6]
+	%long2 = add i64 0, 0		; <i64> [#uses=6]
+	%sbyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%sbyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%short1 = add i16 0, 0		; <i16> [#uses=6]
+	%short2 = add i16 0, 0		; <i16> [#uses=6]
+	%ubyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%ubyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%uint1 = add i32 0, 0		; <i32> [#uses=6]
+	%uint2 = add i32 0, 0		; <i32> [#uses=6]
+	%ulong1 = add i64 0, 0		; <i64> [#uses=6]
+	%ulong2 = add i64 0, 0		; <i64> [#uses=6]
+	%ushort1 = add i16 0, 0		; <i16> [#uses=6]
+	%ushort2 = add i16 0, 0		; <i16> [#uses=6]
+	%test1 = icmp eq i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test2 = icmp uge i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test3 = icmp ugt i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test4 = icmp ule i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test5 = icmp ult i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test6 = icmp ne i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test7 = icmp eq i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test8 = icmp uge i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test9 = icmp ugt i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test10 = icmp ule i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test11 = icmp ult i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test12 = icmp ne i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test13 = icmp eq i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test14 = icmp uge i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test15 = icmp ugt i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test16 = icmp ule i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test17 = icmp ult i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test18 = icmp ne i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test19 = icmp eq i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test20 = icmp uge i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test21 = icmp ugt i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test22 = icmp ule i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test23 = icmp ult i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test24 = icmp ne i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test25 = icmp eq i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test26 = icmp sge i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test27 = icmp sgt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test28 = icmp sle i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test29 = icmp slt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test30 = icmp ne i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test31 = icmp eq i16 %short1, %short2		; <i1> [#uses=0]
+	%test32 = icmp sge i16 %short1, %short2		; <i1> [#uses=0]
+	%test33 = icmp sgt i16 %short1, %short2		; <i1> [#uses=0]
+	%test34 = icmp sle i16 %short1, %short2		; <i1> [#uses=0]
+	%test35 = icmp slt i16 %short1, %short2		; <i1> [#uses=0]
+	%test36 = icmp ne i16 %short1, %short2		; <i1> [#uses=0]
+	%test37 = icmp eq i32 %int1, %int2		; <i1> [#uses=0]
+	%test38 = icmp sge i32 %int1, %int2		; <i1> [#uses=0]
+	%test39 = icmp sgt i32 %int1, %int2		; <i1> [#uses=0]
+	%test40 = icmp sle i32 %int1, %int2		; <i1> [#uses=0]
+	%test41 = icmp slt i32 %int1, %int2		; <i1> [#uses=0]
+	%test42 = icmp ne i32 %int1, %int2		; <i1> [#uses=0]
+	%test43 = icmp eq i64 %long1, %long2		; <i1> [#uses=0]
+	%test44 = icmp sge i64 %long1, %long2		; <i1> [#uses=0]
+	%test45 = icmp sgt i64 %long1, %long2		; <i1> [#uses=0]
+	%test46 = icmp sle i64 %long1, %long2		; <i1> [#uses=0]
+	%test47 = icmp slt i64 %long1, %long2		; <i1> [#uses=0]
+	%test48 = icmp ne i64 %long1, %long2		; <i1> [#uses=0]
+	ret i32 0
+}
diff --git a/test/ExecutionEngine/MCJIT/test-shift.ll b/test/ExecutionEngine/MCJIT/test-shift.ll
new file mode 100644
index 0000000..2da824f
--- /dev/null
+++ b/test/ExecutionEngine/MCJIT/test-shift.ll
@@ -0,0 +1,32 @@
+; RUN: %lli -use-mcjit %s > /dev/null
+
+define i32 @main() {
+	%shamt = add i8 0, 1		; <i8> [#uses=8]
+	%shift.upgrd.1 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1.s = shl i32 1, %shift.upgrd.1		; <i32> [#uses=0]
+	%t2.s = shl i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.2 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1 = shl i32 1, %shift.upgrd.2		; <i32> [#uses=0]
+	%t2 = shl i32 1, 5		; <i32> [#uses=0]
+	%t2.s.upgrd.3 = shl i64 1, 4		; <i64> [#uses=0]
+	%t2.upgrd.4 = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.5 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1.s = ashr i32 1, %shift.upgrd.5		; <i32> [#uses=0]
+	%tr2.s = ashr i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.6 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1 = lshr i32 1, %shift.upgrd.6		; <i32> [#uses=0]
+	%tr2 = lshr i32 1, 5		; <i32> [#uses=0]
+	%tr1.l = ashr i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.7 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.l = ashr i64 1, %shift.upgrd.7		; <i64> [#uses=0]
+	%tr3.l = shl i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.8 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.l = shl i64 1, %shift.upgrd.8		; <i64> [#uses=0]
+	%tr1.u = lshr i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.9 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.u = lshr i64 1, %shift.upgrd.9		; <i64> [#uses=0]
+	%tr3.u = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.10 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.u = shl i64 1, %shift.upgrd.10		; <i64> [#uses=0]
+	ret i32 0
+}
diff --git a/test/Instrumentation/AddressSanitizer/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 35c5c4a..35c5c4a 100644
--- a/test/Instrumentation/AddressSanitizer/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
new file mode 100644
index 0000000..b05ed3c
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 78311af..ce97ca6 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -1084,9 +1084,13 @@ _func:
 @------------------------------------------------------------------------------
         mcr  p7, #1, r5, c1, c1, #4
         mcr2  p7, #1, r5, c1, c1, #4
+        mcr p14, #0, r4, c0, c5
+        mcr2 p4, #2, r2, c1, c3
 
 @ CHECK: mcr	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xee,0x91,0x57]
 @ CHECK: mcr2	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xfe,0x91,0x57]
+@ CHECK: mcr	p14, #0, r4, c0, c5, #0 @ encoding: [0x00,0xee,0x15,0x4e]
+@ CHECK: mcr2	p4, #2, r2, c1, c3, #0  @ encoding: [0x41,0xfe,0x13,0x24]
 
 
 @------------------------------------------------------------------------------
@@ -1206,9 +1210,13 @@ _func:
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
         mrc2  p14, #0, r1, c1, c2, #4
+        mrc p11, #1, r1, c2, c2
+        mrc2 p12, #3, r3, c3, c4
 
 @ CHECK: mrc	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
 @ CHECK: mrc2	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
+@ CHECK: mrc	p11, #1, r1, c2, c2, #0 @ encoding: [0x32,0xee,0x12,0x1b]
+@ CHECK: mrc2	p12, #3, r3, c3, c4, #0 @ encoding: [0x73,0xfe,0x14,0x3c]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index f722dd7..7da79c3 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -216,7 +216,7 @@
         @ Out of order STM registers
         stmda sp!, {r5, r2}
 
-@ CHECK-ERRORS: error: register list not in ascending order
+@ CHECK-ERRORS: warning: register list not in ascending order
 @ CHECK-ERRORS:         stmda     sp!, {r5, r2}
 @ CHECK-ERRORS:                            ^
 
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index 2b14d37..1f07461 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -264,3 +264,7 @@
 @ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
 @ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
 @ CHECK: vst1.32 {d4, d5}, [r2]         @ encoding: [0x8f,0x4a,0x02,0xf4]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x0f,0x89,0x04,0xf4]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index 1722f12..b50d8b6 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -101,3 +101,7 @@
   vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 @ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x04,0xf9,0x0f,0x89]
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index a40e02b..b592f1e 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -120,10 +120,21 @@
 @ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
         vnmls.f32       s1, s2, s0
 
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+        vmrs    APSR_nzcv, fpscr
         vmrs    apsr_nzcv, fpscr
         fmstat
+        vmrs    r2, fpsid
+        vmrs    r3, FPSID
+        vmrs    r4, mvfr0
+        vmrs    r5, MVFR1
+
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs r2, fpsid              @ encoding: [0x10,0x2a,0xf0,0xee]
+@ CHECK: vmrs r3, fpsid              @ encoding: [0x10,0x3a,0xf0,0xee]
+@ CHECK: vmrs r4, mvfr0              @ encoding: [0x10,0x4a,0xf7,0xee]
+@ CHECK: vmrs r5, mvfr1              @ encoding: [0x10,0x5a,0xf6,0xee]
 
 @ CHECK: vnegne.f64 d16, d16         @ encoding: [0x60,0x0b,0xf1,0x1e]
         vnegne.f64      d16, d16
@@ -302,15 +313,40 @@
 
 
 @ VCVT (between floating-point and fixed-point)
-	vcvt.f32.u32 s0, s0, #20
+        vcvt.f32.u32 s0, s0, #20
         vcvt.f64.s32 d0, d0, #32
         vcvt.f32.u16 s0, s0, #1
         vcvt.f64.s16 d0, d0, #16
+        vcvt.f32.s32 s1, s1, #20
+        vcvt.f64.u32 d20, d20, #32
+        vcvt.f32.s16 s17, s17, #1
+        vcvt.f64.u16 d23, d23, #16
+        vcvt.u32.f32 s12, s12, #20 
+        vcvt.s32.f64 d2, d2, #32
+        vcvt.u16.f32 s28, s28, #1
+        vcvt.s16.f64 d15, d15, #16
+        vcvt.s32.f32 s1, s1, #20
+        vcvt.u32.f64 d20, d20, #32
+        vcvt.s16.f32 s17, s17, #1
+        vcvt.u16.f64 d23, d23, #16
 
 @ CHECK: vcvt.f32.u32	s0, s0, #20     @ encoding: [0xc6,0x0a,0xbb,0xee]
 @ CHECK: vcvt.f64.s32	d0, d0, #32     @ encoding: [0xc0,0x0b,0xba,0xee]
 @ CHECK: vcvt.f32.u16	s0, s0, #1      @ encoding: [0x67,0x0a,0xbb,0xee]
 @ CHECK: vcvt.f64.s16	d0, d0, #16     @ encoding: [0x40,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.s32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfa,0xee]
+@ CHECK: vcvt.f64.u32	d20, d20, #32   @ encoding: [0xc0,0x4b,0xfb,0xee]
+@ CHECK: vcvt.f32.s16	s17, s17, #1    @ encoding: [0x67,0x8a,0xfa,0xee]
+@ CHECK: vcvt.f64.u16	d23, d23, #16   @ encoding: [0x40,0x7b,0xfb,0xee]
+
+@ CHECK: vcvt.u32.f32	s12, s12, #20   @ encoding: [0xc6,0x6a,0xbf,0xee]
+@ CHECK: vcvt.s32.f64	d2, d2, #32     @ encoding: [0xc0,0x2b,0xbe,0xee]
+@ CHECK: vcvt.u16.f32	s28, s28, #1    @ encoding: [0x67,0xea,0xbf,0xee]
+@ CHECK: vcvt.s16.f64	d15, d15, #16   @ encoding: [0x40,0xfb,0xbe,0xee]
+@ CHECK: vcvt.s32.f32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfe,0xee]
+@ CHECK: vcvt.u32.f64	d20, d20, #32   @ encoding: [0xc0,0x4b,0xff,0xee]
+@ CHECK: vcvt.s16.f32	s17, s17, #1    @ encoding: [0x67,0x8a,0xfe,0xee]
+@ CHECK: vcvt.u16.f64	d23, d23, #16   @ encoding: [0x40,0x7b,0xff,0xee]
 
 
 @ Use NEON to load some f32 immediates that don't fit the f8 representation.
diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s
index 1212c83..4fb4dec 100644
--- a/test/MC/ARM/vpush-vpop.s
+++ b/test/MC/ARM/vpush-vpop.s
@@ -7,6 +7,21 @@ foo:
  vpush {s8, s9, s10, s11, s12}
  vpop  {d8, d9, d10, d11, d12}
  vpop  {s8, s9, s10, s11, s12}
+@ optional size suffix
+ vpush.s8 {d8, d9, d10, d11, d12}
+ vpush.16 {s8, s9, s10, s11, s12}
+ vpop.f32  {d8, d9, d10, d11, d12}
+ vpop.64  {s8, s9, s10, s11, s12}
+
+@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
+@ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
+@ CHECK-THUMB: vpop  {d8, d9, d10, d11, d12} @ encoding: [0xbd,0xec,0x0a,0x8b]
+@ CHECK-THUMB: vpop  {s8, s9, s10, s11, s12} @ encoding: [0xbd,0xec,0x05,0x4a]
+
+@ CHECK-ARM: vpush {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0x2d,0xed]
+@ CHECK-ARM: vpush {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0x2d,0xed]
+@ CHECK-ARM: vpop  {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0xbd,0xec]
+@ CHECK-ARM: vpop  {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec]
 
 @ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
 @ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
index 808b6eb..4b87899 100644
--- a/test/MC/AsmParser/macro-args.s
+++ b/test/MC/AsmParser/macro-args.s
@@ -8,3 +8,13 @@
 GET    is_sse, %eax
 
 // CHECK: movl	is_sse@GOTOFF(%ebx), %eax
+
+.macro bar
+    .long $n
+.endm
+
+bar 1, 2, 3
+bar
+
+// CHECK: .long 3
+// CHECK: .long 0
diff --git a/test/MC/AsmParser/variables-invalid.s b/test/MC/AsmParser/variables-invalid.s
index 21758d2..c466d42 100644
--- a/test/MC/AsmParser/variables-invalid.s
+++ b/test/MC/AsmParser/variables-invalid.s
@@ -13,6 +13,7 @@ t2_s0:
         t2_s0 = 2
 
         t3_s0 = t2_s0 + 1
+        .long t3_s0
 // CHECK: invalid reassignment of non-absolute variable 't3_s0'
         t3_s0 = 1
 
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index e4346ec..998e9e8 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1869,3 +1869,195 @@
 # CHECK: vmov.f32	d0, #1.600000e+01
 # CHECK: vmov.f32	q0, #1.600000e+01
 
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x0d 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4]!            
+0x4d 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4]!             
+0x8d 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4]!             
+0xcd 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4]!             
+0x06 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x46 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x86 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4], r6          
+0xc6 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x0d 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x4d 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x8d 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0xcd 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x06 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x46 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x86 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0xc6 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x0d 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x4d 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x8d 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0xcd 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x06 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x46 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x86 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0xc6 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x0d 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0xcd 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x06 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x46 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x86 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0xc6 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x0d 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x4d 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x8d 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x06 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x46 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x86 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x0d 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x4d 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x8d 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x06 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x46 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x86 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x0d 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]! 
+0x06 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6 
+0x46 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6 
+0x86 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6 
+
+0x4f 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x8f 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0xcf 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x0f 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]          
+
+0x4f 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x8f 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x0f 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]          
+
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x4f 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]         
+0x8f 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]         
+0x0f 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]         
+
+0x0f 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]     
+0x4f 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]     
+0x8f 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]     
+
+0x0f 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4] 
+0x4f 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4] 
+0x8f 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4] 
+
+0x0f 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4]    
+0x4f 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4]    
+0x8f 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4]    
+
+0x0f 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4] 
+0x4f 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4] 
+0x8f 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4] 
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index ff1838e..f8e7dbe 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1588,3 +1588,193 @@
 0x63 0xf9 0x37 0xc9
 # CHECK: vld2.8	{d28, d30}, [r3, :256], r7
 
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x04 0xf9 0x0d 0x87
+# CHECK: vst1.8	{d8}, [r4]!             
+0x04 0xf9 0x4d 0x87
+# CHECK: vst1.16	{d8}, [r4]!             
+0x04 0xf9 0x8d 0x87
+# CHECK: vst1.32	{d8}, [r4]!             
+0x04 0xf9 0xcd 0x87
+# CHECK: vst1.64	{d8}, [r4]!             
+0x04 0xf9 0x06 0x87
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x04 0xf9 0x46 0x87
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x04 0xf9 0x86 0x87
+# CHECK: vst1.32	{d8}, [r4], r6          
+0x04 0xf9 0xc6 0x87
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x04 0xf9 0x0d 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0x04 0xf9 0xcd 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0xc6 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0xcd 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x46 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x86 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0xc6 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x04 0xf9 0x0d 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x4d 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x8d 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0xcd 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x06 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x46 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x86 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0xc6 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x04 0xf9 0x4d 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x04 0xf9 0x8d 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x04 0xf9 0x06 0x89
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x04 0xf9 0x46 0x89
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x04 0xf9 0x86 0x89
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x04 0xf9 0x0d 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x46 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x86 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x04 0xf9 0x0d 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x4d 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x8d 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x06 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x46 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x86 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6
+
+0x04 0xf9 0x4f 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0x04 0xf9 0xcf 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4f 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x4f 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]        
+0x04 0xf9 0x8f 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]        
+0x04 0xf9 0x0f 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]        
+
+0x04 0xf9 0x0f 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x4f 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x8f 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]    
+
+0x04 0xf9 0x0f 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x4f 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x8f 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]
+
+0x04 0xf9 0x0f 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x4f 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x8f 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4]   
+
+0x04 0xf9 0x0f 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x4f 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x8f 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4]
diff --git a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
index 6a1f11f..f7d6bc6 100644
--- a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
 
 # Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
@@ -8,4 +8,6 @@
 #
 # A8.6.89 LSL (register)
 # if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
 0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
index 096b909..5b13610 100644
--- a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
 
 # Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
@@ -6,4 +6,6 @@
 # | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
 # -------------------------------------------------------------------------------------------------
 # if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
 0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
index b236f8e..874378e 100644
--- a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
 
 # Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
@@ -8,4 +8,6 @@
 #
 # A8.6.183 SSAT
 # if d == 15 || n == 15 then UNPREDICTABLE;
+
+# CHECK:warning: potentially undefined instruction encoding
 0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
index d3998bd..fef6125 100644
--- a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
 
 # Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
@@ -7,4 +7,6 @@
 # -------------------------------------------------------------------------------------------------
 #
 # if t == 15 then UNPREDICTABLE
+
+# CHECK: warning: potentially undefined instruction encoding
 0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
index fb3e711..4c4c9ab 100644
--- a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
+++ b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
 
 # Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
 #  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
@@ -10,3 +10,7 @@
 #
 # if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
 0x9f 0x5f 0x66 0xe6
+
+# CHECK: warning: potentially undefined
+# CHECK: uqadd8	r5, r6, pc
+ 
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 54b242d..e2883c7 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -77,3 +77,25 @@
 
 # CHECK: test RAX, 0
 0x48 0xa9 0x00 0x00 0x00 0x00
+
+# CHECK: sysret
+0x48 0x0f 0x07
+
+# CHECK: sysret
+0x0f 0x07
+
+# CHECK: sysexit
+0x48 0x0f 0x35
+
+# CHECK: sysexit
+0x0f 0x35
+
+# CHECK: iret
+0x66 0xcf
+
+# CHECK: iretd
+0xcf
+
+# CHECK: iretq
+0x48 0xcf
+
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 840d5fa..c0e77d0 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -724,3 +724,8 @@
 
 # CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
 0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
+
+# rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
+# CHECK: lock
+# CHECK-NEXT: xaddq	%rcx, %rbx
+0xf0 0x48 0x0f 0xc1 0xcb
diff --git a/test/MC/MachO/ARM/static-movt-relocs.s b/test/MC/MachO/ARM/static-movt-relocs.s
new file mode 100644
index 0000000..dce5683
--- /dev/null
+++ b/test/MC/MachO/ARM/static-movt-relocs.s
@@ -0,0 +1,23 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+        .thumb
+        .thumb_func foo
+foo:
+        movw r0, :lower16:(bar + 16)
+        movt r0, :upper16:(bar + 16)
+        bx r0
+
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0x4),
+@ CHECK:     ('word-1', 0x8e000001)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0x10),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 2
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x8c000001)),
+@ CHECK:    # Relocation 3
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK:  ])
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
new file mode 100644
index 0000000..875ba3b
--- /dev/null
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -0,0 +1,45 @@
+; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that this is big endian.
+; CHECK: ('e_indent[EI_DATA]', 0x02)
+
+; Make sure that a section table (text) entry is correct.
+; CHECK:   (('sh_name', 0x{{[0]*}}5) # '.text'
+; CHECKNEXT:   ('sh_type', 0x{{[0]*}}1)
+; CHECKNEXT:   ('sh_flags', 0x{{[0]*}}6)
+; CHECKNEXT:   ('sh_addr', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_offset', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_size', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_link', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_info', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_addralign', 0x{{[0]*}}4)
+; CHECKNEXT:   ('sh_entsize', 0x{{[0]+}})
+
+; See that at least first 3 instructions are correct: GP prologue
+; CHECKNEXT:   ('_section_data', '3c1c0000 279c0000 0399e021 {{[0-9,a-f]*}}')
+
+; ModuleID = '../br1.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+@x = global i32 1, align 4
+@str = private unnamed_addr constant [4 x i8] c"goo\00"
+@str2 = private unnamed_addr constant [4 x i8] c"foo\00"
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %foo
+
+if.end:                                           ; preds = %entry
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i32 0, i32 0))
+  br label %foo
+
+foo:                                              ; preds = %entry, %if.end
+  %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str2, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index 4621182..f7ed348 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -1,7 +1,7 @@
-; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE %s
-; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE %s
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE %s
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE %s
 
-; Check that we produce the correct endian.
+// Check that we produce the correct endian.
 
-; CHECK-BE: ('e_indent[EI_DATA]', 0x02)
-; CHECK-LE: ('e_indent[EI_DATA]', 0x01)
+// CHECK-BE: ('e_indent[EI_DATA]', 0x02)
+// CHECK-LE: ('e_indent[EI_DATA]', 0x01)
diff --git a/test/MC/X86/address-size.s b/test/MC/X86/address-size.s
new file mode 100644
index 0000000..b105b40
--- /dev/null
+++ b/test/MC/X86/address-size.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+	.code64
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%rsi)
+// CHECK: encoding: [0xc6,0x06,0x00]
+
+	.code32
+	movb	$0x0, (%si)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0xc6,0x06,0x00]
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
index 149a9a3..eee568e 100644
--- a/test/MC/X86/lit.local.cfg
+++ b/test/MC/X86/lit.local.cfg
@@ -1 +1,12 @@
 config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index d5e1b9c..f53b672 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -340,19 +340,27 @@ rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
 
 // rdar://8418316
 // PR12173
-// CHECK: shldw	%cl, %bx, %bx
-// CHECK: shldw	%cl, %bx, %bx
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shrdw	%cl, %bx, %bx
-// CHECK: shrdw	%cl, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
-
-shld  %bx, %bx
-shld  %cl, %bx, %bx
-shld  $1, %bx, %bx
-shrd  %bx, %bx
-shrd  %cl, %bx, %bx
-shrd  $1, %bx, %bx
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	$1, %bx, %dx
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	$1, %bx, %dx
+// CHECK: shrdw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, (%rax)
+
+shld  %bx, %dx
+shld  %cl, %bx, %dx
+shld  $1, %bx, %dx
+shld  %bx, (%rax)
+shld  %cl, %bx, (%rax)
+shrd  %bx, %dx
+shrd  %cl, %bx, %dx
+shrd  $1, %bx, %dx
+shrd  %bx, (%rax)
+shrd  %cl, %bx, (%rax)
 
 // CHECK: sldtl	%ecx
 // CHECK: encoding: [0x0f,0x00,0xc1]
@@ -1045,6 +1053,9 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 	movsl
 	movsl	%ds:(%rsi), %es:(%rdi)
 	movsl	(%rsi), %es:(%rdi)
+// rdar://10883092
+// CHECK: movsd
+	movsl	(%rsi), (%rdi)
 
 // CHECK: movsq # encoding: [0x48,0xa5]
 // CHECK: movsq
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index 8de7444..f161e06 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -20,3 +20,11 @@ movl 0(%rax), 0(%edx)  // error: invalid operand for instruction
 
 // 32: error: instruction requires a CPU feature not currently enabled
 sysexitq
+
+// rdar://10710167
+// 64: error: expected scale expression
+lea (%rsp, %rbp, $4), %rax
+
+// rdar://10423777
+// 64: error: index register is 32-bit, but base register is 64-bit
+movq (%rsi,%ecx),%xmm0
diff --git a/test/Makefile b/test/Makefile
index a4e53f8..6cd27cc 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -171,6 +171,7 @@ lit.site.cfg: site.exp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
+	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
@@ -184,5 +185,6 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> unit.tmp
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> unit.tmp
 	@$(ECHOPATH) s=@SHLIBPATH_VAR@=$(SHLIBPATH_VAR)=g >> unit.tmp
+	@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> unit.tmp
 	@sed -f unit.tmp $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
 	@-rm -f unit.tmp
diff --git a/test/Object/Inputs/elf-versioning-test.i386 b/test/Object/Inputs/elf-versioning-test.i386
new file mode 100755
index 0000000..c7c1eac
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.i386
diff --git a/test/Object/Inputs/elf-versioning-test.x86_64 b/test/Object/Inputs/elf-versioning-test.x86_64
new file mode 100755
index 0000000..cba79ba
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.x86_64
diff --git a/test/Object/Inputs/elfver.S b/test/Object/Inputs/elfver.S
new file mode 100644
index 0000000..ba63279
--- /dev/null
+++ b/test/Object/Inputs/elfver.S
@@ -0,0 +1,31 @@
+# Compile with:
+#   ARGS="-shared -nostdlib -Wl,--version-script=elfver.script"
+#   clang $ARGS -m32 elfver.S -lc -o elf-versioning-test.i386
+#   clang $ARGS -m64 elfver.S -lc -o elf-versioning-test.x86_64
+
+# Also, strip off non-dynamic symbols:
+#   strip elf-versioning-test.i386
+#   strip elf-versioning-test.x86_64
+
+#ifdef __i386__
+.symver _puts, puts@GLIBC_2.0
+#else
+.symver _puts, puts@GLIBC_2.2.5
+#endif
+call _puts@PLT
+
+.symver foo1, foo@VER1
+.globl foo1
+.type foo1, @function
+foo1:
+  ret
+
+.symver foo2, foo@@VER2
+.globl foo2
+.type foo2, @function
+foo2:
+  ret
+
+.globl unversioned_define
+.type unversioned_define, @function
+unversioned_define:
diff --git a/test/Object/Inputs/elfver.script b/test/Object/Inputs/elfver.script
new file mode 100644
index 0000000..1316fcb
--- /dev/null
+++ b/test/Object/Inputs/elfver.script
@@ -0,0 +1,10 @@
+VER1 {
+  global:
+    foo;
+};
+
+VER2 {
+  global:
+    foo;
+} VER1;
+
diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test
new file mode 100644
index 0000000..0906f34
--- /dev/null
+++ b/test/Object/readobj-elf-versioning.test
@@ -0,0 +1,15 @@
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF: foo@@VER2          FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: foo@VER1           FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: unversioned_define FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+
+ELF32: puts@GLIBC_2.0   FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
+ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
new file mode 100644
index 0000000..9c502f4
--- /dev/null
+++ b/test/TableGen/TwoLevelName.td
@@ -0,0 +1,46 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Type<string name, int length, int width> {
+  string Name = name;
+  int Length = length;
+  int Width = width;
+}
+
+multiclass OT1<string ss, int l, int w> {
+  def _#NAME# : Type<ss, l, w>;
+}
+multiclass OT2<string ss, int w> {
+  defm  v1#NAME# : OT1<!strconcat( "v1", ss),  1, w>;
+  defm  v2#NAME# : OT1<!strconcat( "v2", ss),  2, w>;
+  defm  v3#NAME# : OT1<!strconcat( "v3", ss),  3, w>;
+  defm  v4#NAME# : OT1<!strconcat( "v4", ss),  4, w>;
+  defm  v8#NAME# : OT1<!strconcat( "v8", ss),  8, w>;
+  defm v16#NAME# : OT1<!strconcat("v16", ss), 16, w>;
+}
+
+defm i8 : OT2<"i8", 8>;
+
+// CHECK: _v16i8
+// CHECK: Length = 16
+// CHECK: Width = 8
+
+// CHECK: _v1i8
+// CHECK: Length = 1
+// CHECK: Width = 8
+
+// CHECK: _v2i8
+// CHECK: Length = 2
+// CHECK: Width = 8
+
+// CHECK: _v3i8
+// CHECK: Length = 3
+// CHECK: Width = 8
+
+// CHECK: _v4i8
+// CHECK: Length = 4
+// CHECK: Width = 8
+
+// CHECK: _v8i8
+// CHECK: Length = 8
+// CHECK: Width = 8
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 270c048..475cd8d 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -79,4 +79,103 @@ Impossible:
 
 LessThanOrEqualToTwo:
   ret i32 0
-}
-\ No newline at end of file
+}
+
+define i32 @switch1(i32 %s) {
+; CHECK: @switch1
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+  switch i32 %s, label %out [
+; CHECK: switch i32 %s, label %out
+    i32 0, label %out
+; CHECK-NOT: i32 0
+    i32 1, label %out
+; CHECK-NOT: i32 1
+    i32 -1, label %next
+; CHECK: i32 -1, label %next
+    i32 -2, label %next
+; CHECK: i32 -2, label %next
+    i32 2, label %out
+; CHECK-NOT: i32 2
+    i32 3, label %out
+; CHECK-NOT: i32 3
+  ]
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+define i32 @switch2(i32 %s) {
+; CHECK: @switch2
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 0, label %out
+    i32 -1, label %next
+    i32 -2, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define i32 @switch3(i32 %s) {
+; CHECK: @switch3
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 -1, label %out
+    i32 -2, label %next
+    i32 -3, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define void @switch4(i32 %s) {
+; CHECK: @switch4
+entry:
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %zero, label %out
+
+zero:
+  switch i32 %s, label %out [
+    i32 0, label %next
+    i32 1, label %out
+    i32 -1, label %out
+  ]
+; CHECK: br label %next
+
+out:
+  ret void
+
+next:
+  ret void
+}
diff --git a/test/Transforms/EarlyCSE/instsimplify-dom.ll b/test/Transforms/EarlyCSE/instsimplify-dom.ll
new file mode 100644
index 0000000..36dffec
--- /dev/null
+++ b/test/Transforms/EarlyCSE/instsimplify-dom.ll
@@ -0,0 +1,19 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+; PR12231
+
+declare i32 @f()
+
+define i32 @fn() {
+entry:
+  br label %lbl_1215
+
+lbl_1215:
+  %ins34 = phi i32 [ %ins35, %xxx ], [ undef, %entry ]
+  ret i32 %ins34
+
+xxx:
+  %ins35 = call i32 @f()
+  br label %lbl_1215
+}
+
+; CHECK: define i32 @fn
diff --git a/test/Transforms/GlobalOpt/invoke.ll b/test/Transforms/GlobalOpt/invoke.ll
new file mode 100644
index 0000000..c1f499c
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invoke.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+; rdar://11022897
+
+; Globalopt should be able to evaluate an invoke.
+; CHECK: @tmp = global i32 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @one() {
+  ret i32 1
+}
+
+define void @_GLOBAL__I_a() {
+bb:
+  %tmp1 = invoke i32 @one()
+          to label %bb2 unwind label %bb4
+
+bb2:                                              ; preds = %bb
+  store i32 %tmp1, i32* @tmp
+  ret void
+
+bb4:                                              ; preds = %bb
+  %tmp5 = landingpad { i8*, i32 } personality i8* undef
+          filter [0 x i8*] zeroinitializer
+  unreachable
+}
diff --git a/test/Transforms/Inline/2007-06-06-NoInline.ll b/test/Transforms/Inline/2007-06-06-NoInline.ll
deleted file mode 100644
index d5a7953..0000000
--- a/test/Transforms/Inline/2007-06-06-NoInline.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -inline -S | grep "define internal i32 @bar"
-@llvm.noinline = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @bar to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define internal i32 @bar(i32 %x, i32 %y) {
-entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	%y_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %x, i32* %x_addr
-	store i32 %y, i32* %y_addr
-	%tmp1 = load i32* %x_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %y_addr		; <i32> [#uses=1]
-	%tmp3 = add i32 %tmp1, %tmp2		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-	%a_addr = alloca i32		; <i32*> [#uses=2]
-	%b_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %a, i32* %a_addr
-	store i32 %b, i32* %b_addr
-	%tmp1 = load i32* %b_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %a_addr		; <i32> [#uses=1]
-	%tmp3 = call i32 @bar( i32 %tmp1, i32 %tmp2 )		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
index 91ab40a..fb4062f 100644
--- a/test/Transforms/Inline/alloca-bonus.ll
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -81,3 +81,44 @@ bb.true:
 bb.false:
   ret void
 }
+
+define void @outer4(i32 %A) {
+; CHECK: @outer4
+; CHECK-NOT: call void @inner4
+  %ptr = alloca i32
+  call void @inner4(i32* %ptr, i32 %A)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction. However, we
+; still want to detect that the icmp and branch *can* be handled.
+define void @inner4(i32 *%ptr, i32 %A) {
+  %B = getelementptr i32* %ptr, i32 %A
+  %E = icmp eq i32* %ptr, null
+  br i1 %E, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 537c69b..cc7aaac 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -1,14 +1,89 @@
-; RUN: opt < %s -inline -S | not grep callee
-; RUN: opt < %s -inline -S | not grep div
+; RUN: opt < %s -inline -S | FileCheck %s
 
+define internal i32 @callee1(i32 %A, i32 %B) {
+  %C = sdiv i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @caller1() {
+; CHECK: define i32 @caller1
+; CHECK-NEXT: ret i32 3
 
-define internal i32 @callee(i32 %A, i32 %B) {
-        %C = sdiv i32 %A, %B            ; <i32> [#uses=1]
-        ret i32 %C
+  %X = call i32 @callee1( i32 10, i32 3 )
+  ret i32 %X
 }
 
-define i32 @test() {
-        %X = call i32 @callee( i32 10, i32 3 )          ; <i32> [#uses=1]
-        ret i32 %X
+define i32 @caller2() {
+; CHECK: @caller2
+; CHECK-NOT: call void @callee2
+; CHECK: ret
+
+; We contrive to make this hard for *just* the inline pass to do in order to
+; simulate what can actually happen with large, complex functions getting
+; inlined.
+  %a = add i32 42, 0
+  %b = add i32 48, 0
+
+  %x = call i32 @callee21(i32 %a, i32 %b)
+  ret i32 %x
 }
 
+define i32 @callee21(i32 %x, i32 %y) {
+  %sub = sub i32 %y, %x
+  %result = call i32 @callee22(i32 %sub)
+  ret i32 %result
+}
+
+declare i8* @getptr()
+
+define i32 @callee22(i32 %x) {
+  %icmp = icmp ugt i32 %x, 42
+  br i1 %icmp, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %ptr = call i8* @getptr()
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+  load volatile i8* %ptr
+
+  ret i32 %x
+bb.false:
+  ret i32 %x
+}
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
new file mode 100644
index 0000000..0b431d6
--- /dev/null
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -0,0 +1,56 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
+
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret i32
+
+  %ptr = alloca i32
+  %ptr1 = getelementptr inbounds i32* %ptr, i32 0
+  %ptr2 = getelementptr inbounds i32* %ptr, i32 42
+  %result = call i32 @inner1(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner1(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
+
+define i32 @outer2(i32* %ptr) {
+; Test that an inbounds GEP disables this -- it isn't safe in general as
+; wrapping changes the behavior of lessthan and greaterthan comparisions.
+; CHECK: @outer2
+; CHECK: call i32 @inner2
+; CHECK: ret i32
+
+  %ptr1 = getelementptr i32* %ptr, i32 0
+  %ptr2 = getelementptr i32* %ptr, i32 42
+  %result = call i32 @inner2(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner2(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
diff --git a/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
new file mode 100644
index 0000000..58ccf12
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+; Derived from gcc.c-torture/execute/frame-address.c
+
+; CHECK:     @func
+; CHECK:     return:
+; CHECK-NOT: ret i32 0
+; CHECK:     ret i32 %retval
+
+define i32 @func(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp {
+entry:
+  %d = alloca i8, align 1
+  store i8 0, i8* %d, align 1
+  %cmp = icmp ugt i8* %d, %c
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp ule i8* %d, %f
+  %not.cmp1 = icmp uge i8* %c, %f
+  %.cmp2 = and i1 %cmp2, %not.cmp1
+  %land.ext = zext i1 %.cmp2 to i32
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp uge i8* %d, %f
+  %not.cmp3 = icmp ule i8* %c, %f
+  %.cmp5 = and i1 %cmp5, %not.cmp3
+  %land.ext7 = zext i1 %.cmp5 to i32
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %land.ext, %if.then ], [ %land.ext7, %if.else ]
+  ret i32 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
new file mode 100644
index 0000000..c1602da
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR12234
+
+@g = extern_weak global i32
+define i32 @function(i32 %x) nounwind {
+entry:
+  %xor = xor i32 %x, 1
+  store volatile i32 %xor, i32* inttoptr (i64 1 to i32*), align 4
+  %or4 = or i32 or (i32 zext (i1 icmp eq (i32* @g, i32* null) to i32), i32 1), %xor
+  ret i32 %or4
+}
+; CHECK: define i32 @function
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 53a5643..edb5305 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,116 +1,184 @@
 ; This test makes sure that mul instructions are properly eliminated.
-; RUN: opt < %s -instcombine -S | not grep mul
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
+; CHECK: @test1
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
+; CHECK: @test2
         ; Should convert to an add instruction
         %B = mul i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 1
 }
 
 define i32 @test3(i32 %A) {
+; CHECK: @test3
         ; This should disappear entirely
         %B = mul i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 0
 }
 
 define double @test4(double %A) {
+; CHECK: @test4
         ; This is safe for FP
         %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
+; CHECK: ret double %A
 }
 
 define i32 @test5(i32 %A) {
+; CHECK: @test5
         %B = mul i32 %A, 8              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 3
 }
 
 define i8 @test6(i8 %A) {
+; CHECK: @test6
         %B = mul i8 %A, 8               ; <i8> [#uses=1]
         %C = mul i8 %B, 8               ; <i8> [#uses=1]
         ret i8 %C
+; CHECK: shl i8 %A, 6
 }
 
 define i32 @test7(i32 %i) {
+; CHECK: @test7
         %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
         ret i32 %tmp
+; CHECK: sub i32 0, %i
 }
 
 define i64 @test8(i64 %i) {
-       ; tmp = sub 0, %i
+; CHECK: @test8
         %j = mul i64 %i, -1             ; <i64> [#uses=1]
         ret i64 %j
+; CHECK: sub i64 0, %i
 }
 
 define i32 @test9(i32 %i) {
-        ; %j = sub 0, %i
+; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
+; CHECJ: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
+; CHECK: @test10
         %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST10]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 define i32 @test11(i32 %a, i32 %b) {
+; CHECK: @test11
         %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST11]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
-define i32 @test12(i8 %a, i32 %b) {
-        %c = icmp ugt i8 %a, 127                ; <i1> [#uses=1]
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK: @test12
+        %c = icmp ugt i32 %a, 2147483647                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
-        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST12]], %b
+; CHECK-NEXT: ret i32 %e
+
 }
 
 ; PR2642
 define internal void @test13(<4 x float>*) {
+; CHECK: @test13
 	load <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
+; CHECK-NEXT: ret void
 }
 
 define <16 x i8> @test14(<16 x i8> %a) {
+; CHECK: @test14
         %b = mul <16 x i8> %a, zeroinitializer
         ret <16 x i8> %b
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
 }
 
 ; rdar://7293527
 define i32 @test15(i32 %A, i32 %B) {
+; CHECK: @test15
 entry:
   %shl = shl i32 1, %B
   %m = mul i32 %shl, %A
   ret i32 %m
+; CHECK: shl i32 %A, %B
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test16(i32 %b, i1 %c) {
+; CHECK: @test16
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
+; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test17(i32 %a, i32 %b) {
+; CHECK: @test17
   %a.lobit = lshr i32 %a, 31
   %e = mul i32 %a.lobit, %b
   ret i32 %e
+; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST17]], %b
+; CHECK-NEXT: ret i32 %e
+}
+
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK: @test18
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+
+  %E = mul i32 %C, %D
+  %F = and i32 %E, 16
+  ret i32 %F
+; CHECK-NEXT: ret i32 0
 }
 
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
 
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK: @test19
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
 
+; It would be nice if we also started proving that this doesn't overflow.
+  %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+  %F = extractvalue {i32, i1} %E, 0
+  %G = extractvalue {i32, i1} %E, 1
+  call void @use(i1 %G)
+  %H = and i32 %F, 16
+  ret i32 %H
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstSimplify/phi.ll b/test/Transforms/InstSimplify/phi.ll
new file mode 100644
index 0000000..05cd40d
--- /dev/null
+++ b/test/Transforms/InstSimplify/phi.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; PR12189
+define i1 @test1(i32 %x) {
+; CHECK: @test1
+  br i1 true, label %a, label %b
+
+a:
+  %aa = or i32 %x, 10
+  br label %c
+
+b:
+  %bb = or i32 %x, 10
+  br label %c
+
+c:
+  %cc = phi i32 [ %bb, %b ], [%aa, %a ]
+  %d = urem i32 %cc, 2
+  %e = icmp eq i32 %d, 0
+  ret i1 %e
+; CHECK: ret i1 %e
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
new file mode 100644
index 0000000..013964c
--- /dev/null
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @ptrdiff1(i8* %ptr) {
+; CHECK: @ptrdiff1
+; CHECK-NEXT: ret i64 42
+
+  %first = getelementptr i8* %ptr, i32 0
+  %last = getelementptr i8* %ptr, i32 42
+  %first.int = ptrtoint i8* %first to i64
+  %last.int = ptrtoint i8* %last to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
+
+define i64 @ptrdiff2(i8* %ptr) {
+; CHECK: @ptrdiff2
+; CHECK-NEXT: ret i64 42
+
+  %first1 = getelementptr i8* %ptr, i32 0
+  %first2 = getelementptr i8* %first1, i32 1
+  %first3 = getelementptr i8* %first2, i32 2
+  %first4 = getelementptr i8* %first3, i32 4
+  %last1 = getelementptr i8* %first2, i32 48
+  %last2 = getelementptr i8* %last1, i32 8
+  %last3 = getelementptr i8* %last2, i32 -4
+  %last4 = getelementptr i8* %last3, i32 -4
+  %first.int = ptrtoint i8* %first4 to i64
+  %last.int = ptrtoint i8* %last4 to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
index 3c8169e..e659e6f 100644
--- a/test/Transforms/InstSimplify/reassociate.ll
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -184,3 +184,12 @@ define i32 @udiv5(i32 %x, i32 %y) {
 ; CHECK: ret i32 %x
 }
 
+define i16 @trunc1(i32 %x) {
+; CHECK: @trunc1
+  %y = add i32 %x, 1
+  %tx = trunc i32 %x to i16
+  %ty = trunc i32 %y to i16
+  %d = sub i16 %ty, %tx
+  ret i16 %d
+; CHECK: ret i16 1
+}
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index cce23ea..78d36e7 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,12 +1,12 @@
 ; RUN: opt < %s -jump-threading -S | FileCheck %s
-; rdar://6402033
 
-; Test that we can thread through the block with the partially redundant load (%2).
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-define i32 @foo(i32* %P) nounwind {
-; CHECK: foo
+; Test that we can thread through the block with the partially redundant load (%2).
+; rdar://6402033
+define i32 @test1(i32* %P) nounwind {
+; CHECK: @test1
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
@@ -39,3 +39,43 @@ bb3:		; preds = %bb1
 declare i32 @f1(...)
 
 declare i32 @f2(...)
+
+
+;; Check that we preserve TBAA information.
+; rdar://11039258
+
+define i32 @test2(i32* %P) nounwind {
+; CHECK: @test2
+entry:
+	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb
+
+bb:		; preds = %entry
+; CHECK: bb1.thread:
+; CHECK: store{{.*}}, !tbaa !0
+; CHECK: br label %bb3
+	store i32 42, i32* %P, align 4, !tbaa !0
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+	%2 = load i32* %P, align 4, !tbaa !0
+	%3 = icmp sgt i32 %2, 36
+	br i1 %3, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%4 = tail call i32 (...)* @f2() nounwind
+	ret i32 %res.0
+
+bb3:		; preds = %bb1
+; CHECK: bb3:
+; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.01
+	ret i32 %res.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
new file mode 100644
index 0000000..9c805da
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+
+; Make sure the preheader exists.
+; CHECK: sw.bb103:
+; CHECK: indirectbr {{.*}}label %while.cond112
+; CHECK: while.cond112:
+; But the tail is not split.
+; CHECK: for.body:
+; CHECK: indirectbr {{.*}}label %while.cond112
+define fastcc void @build_regex_nfa() nounwind uwtable ssp {
+entry:
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond), [label %while.cond]
+
+while.cond:                                       ; preds = %if.then439, %entry
+  indirectbr i8* blockaddress(@build_regex_nfa, %sw.bb103), [label %do.body785, label %sw.bb103]
+
+sw.bb103:                                         ; preds = %while.body
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond112), [label %while.cond112]
+
+while.cond112:                                    ; preds = %for.body, %for.cond.preheader, %sw.bb103
+  %pc.0 = phi i8 [ -1, %sw.bb103 ], [ 0, %for.body ], [ %pc.0, %for.cond.preheader ]
+  indirectbr i8* blockaddress(@build_regex_nfa, %Lsetdone), [label %sw.bb118, label %Lsetdone]
+
+sw.bb118:                                         ; preds = %while.cond112
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.cond.preheader), [label %Lerror.loopexit, label %for.cond.preheader]
+
+for.cond.preheader:                               ; preds = %sw.bb118
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+for.body:                                         ; preds = %for.body, %for.cond.preheader
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+Lsetdone:                                         ; preds = %while.cond112
+  unreachable
+
+do.body785:                                       ; preds = %while.cond, %while.body
+  ret void
+
+Lerror.loopexit:                                  ; preds = %sw.bb118
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
new file mode 100644
index 0000000..0172492
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
@@ -0,0 +1,155 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://problem/11049788> Segmentation fault: 11 in LoopStrengthReduce
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; IVUsers should not consider tmp128 a valid user because it is not in a
+; simplified loop nest.
+; CHECK: @nopreheader
+; CHECK: for.cond:
+; CHECK: %tmp128 = add i64 %0, %indvar65
+define void @nopreheader(i8* %cmd) nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond]
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = phi i64 [ %indvar.next48, %while.body ], [ 0, %entry ]
+  indirectbr i8* undef, [label %while.end, label %while.body]
+
+while.body:                                       ; preds = %lor.rhs, %lor.lhs.false17, %lor.lhs.false11, %lor.lhs.false, %land.rhs
+  %indvar.next48 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %if.end152]
+
+if.end152:                                        ; preds = %lor.lhs.false144, %if.end110
+  indirectbr i8* undef, [label %lor.lhs.false184, label %for.cond]
+
+lor.lhs.false184:                                 ; preds = %lor.lhs.false177
+  indirectbr i8* undef, [label %return, label %for.cond]
+
+for.cond:                                         ; preds = %for.inc, %lor.lhs.false184, %if.end152
+  %indvar65 = phi i64 [ %indvar.next66, %for.inc ], [ 0, %lor.lhs.false184 ], [ 0, %if.end152 ]
+  %tmp128 = add i64 %0, %indvar65
+  %s.4 = getelementptr i8* %cmd, i64 %tmp128
+  %tmp195 = load i8* %s.4, align 1
+  indirectbr i8* undef, [label %return, label %land.rhs198]
+
+land.rhs198:                                      ; preds = %for.cond
+  indirectbr i8* undef, [label %return, label %for.inc]
+
+for.inc:                                          ; preds = %lor.rhs234, %land.lhs.true228, %land.lhs.true216, %land.lhs.true204
+  %indvar.next66 = add i64 %indvar65, 1
+  indirectbr i8* undef, [label %for.cond]
+
+return:                                           ; preds = %if.end677, %doshell, %if.then96
+  ret void
+}
+
+; Another case with a dominating loop that does not contain the IV
+; User. Just make sure it doesn't assert.
+define void @nopreheader2() nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond, label %return]
+
+while.cond:                                       ; preds = %while.cond.backedge, %entry
+  indirectbr i8* undef, [label %while.cond.backedge, label %lor.rhs]
+
+lor.rhs:                                          ; preds = %while.cond
+  indirectbr i8* undef, [label %while.cond.backedge, label %while.end]
+
+while.cond.backedge:                              ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs
+  indirectbr i8* undef, [label %if.then18, label %return]
+
+if.then18:                                        ; preds = %while.end
+  indirectbr i8* undef, [label %if.end35, label %lor.lhs.false]
+
+lor.lhs.false:                                    ; preds = %if.then18
+  indirectbr i8* undef, [label %if.end35, label %return]
+
+if.end35:                                         ; preds = %lor.lhs.false, %if.then18
+  indirectbr i8* undef, [label %while.cond36]
+
+while.cond36:                                     ; preds = %while.body49, %if.end35
+  %0 = phi i64 [ %indvar.next13, %while.body49 ], [ 0, %if.end35 ]
+  indirectbr i8* undef, [label %while.body49, label %lor.rhs42]
+
+lor.rhs42:                                        ; preds = %while.cond36
+  indirectbr i8* undef, [label %while.body49, label %while.end52]
+
+while.body49:                                     ; preds = %lor.rhs42, %while.cond36
+  %indvar.next13 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond36]
+
+while.end52:                                      ; preds = %lor.rhs42
+  indirectbr i8* undef, [label %land.lhs.true, label %return]
+
+land.lhs.true:                                    ; preds = %while.end52
+  indirectbr i8* undef, [label %while.cond66.preheader, label %return]
+
+while.cond66.preheader:                           ; preds = %land.lhs.true
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond66:                                     ; preds = %while.body77, %while.cond66.preheader
+  indirectbr i8* undef, [label %land.rhs, label %while.cond81.preheader]
+
+land.rhs:                                         ; preds = %while.cond66
+  indirectbr i8* undef, [label %while.body77, label %while.cond81.preheader]
+
+while.cond81.preheader:                           ; preds = %land.rhs, %while.cond66
+  %tmp45 = add i64 undef, %0
+  %tmp46 = add i64 %tmp45, undef
+  indirectbr i8* undef, [label %while.cond81]
+
+while.body77:                                     ; preds = %land.rhs
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond81:                                     ; preds = %while.body94, %while.cond81.preheader
+  %tmp25 = add i64 %tmp46, undef
+  indirectbr i8* undef, [label %while.body94, label %lor.rhs87]
+
+lor.rhs87:                                        ; preds = %while.cond81
+  indirectbr i8* undef, [label %while.body94, label %return]
+
+while.body94:                                     ; preds = %lor.rhs87, %while.cond81
+  indirectbr i8* undef, [label %while.cond81]
+
+return:                                           ; preds = %if.end216, %land.lhs.true183, %land.lhs.true, %while.end52, %lor.lhs.false, %while.end, %entry
+  ret void
+}
+
+; Test a phi operand IV User dominated by a no-preheader loop.
+define void @nopreheader3() nounwind uwtable ssp align 2 {
+entry:
+  indirectbr i8* blockaddress(@nopreheader3, %if.end10), [label %if.end22, label %if.end10]
+
+if.end10:                                         ; preds = %entry
+  indirectbr i8* blockaddress(@nopreheader3, %if.end6.i), [label %if.end22, label %if.end6.i]
+
+if.end6.i:                                        ; preds = %if.end10
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+while.cond2.preheader.i.i:                        ; preds = %while.end.i18.i, %if.end6.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.i.i), [label %while.cond2.i.i]
+
+while.cond2.i.i:                                  ; preds = %while.cond2.i.i, %while.cond2.preheader.i.i
+  %i1.1.i14.i = phi i32 [ %add.i15.i, %while.cond2.i.i ], [ undef, %while.cond2.preheader.i.i ]
+  %add.i15.i = add nsw i32 %i1.1.i14.i, undef
+  indirectbr i8* blockaddress(@nopreheader3, %while.end.i18.i), [label %while.cond2.i.i, label %while.end.i18.i]
+
+while.end.i18.i:                                  ; preds = %while.cond2.i.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+if.then12:                                        ; preds = %while.end.i18.i, %if.end6.i
+  %i1.0.lcssa.i.i = phi i32 [ undef, %if.end6.i ], [ %i1.1.i14.i, %while.end.i18.i ]
+  indirectbr i8* blockaddress(@nopreheader3, %if.end22), [label %if.end22]
+
+if.end22:                                         ; preds = %if.then12, %if.end10, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index cb23ad0..cb23ad0 100644
--- a/test/Transforms/LoopStrengthReduce/2011-11-29-postincphi.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 5108650..5108650 100644
--- a/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 552f4e0..08bd8c0 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1721,6 +1721,82 @@ define void @test61() {
   ret void
 }
 
+; Delete a retain matched by releases when one is inside the loop and the
+; other is outside the loop.
+
+; CHECK: define void @test62(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test62(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Like test62 but with no release in exit.
+; Don't delete anything!
+
+; CHECK: define void @test63(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: loop.more:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test63(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Like test62 but with no release in loop.more.
+; Don't delete anything!
+
+; CHECK: define void @test64(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test64(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
 declare void @bar(i32 ()*)
 
 ; A few real-world testcases.
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index cf971e4..9e26209 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -2,6 +2,7 @@
 
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare i8* @objc_msgSend(i8*, i8*, ...)
 declare void @use_pointer(i8*)
 declare void @callee()
@@ -68,6 +69,41 @@ done:
   ret void
 }
 
+; The optimizer should ignore invoke unwind paths consistently.
+; PR12265
+
+; CHECK: define void @test2() {
+; CHECK: invoke.cont:
+; CHECK-NEXT: call i8* @objc_retain
+; CHEK-NOT: @objc
+; CHECK: finally.cont:
+; CHECK-NEXT: call void @objc_release
+; CHEK-NOT: @objc
+; CHECK: finally.rethrow:
+; CHEK-NOT: @objc
+; CHECK: }
+define void @test2() {
+entry:
+  %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
+          to label %invoke.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+invoke.cont:                                      ; preds = %entry
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void ()*)(), !clang.arc.no_objc_arc_exceptions !0
+  invoke void @use_pointer(i8* %call)
+          to label %finally.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+finally.cont:                                     ; preds = %invoke.cont
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  ret void
+
+finally.rethrow:                                  ; preds = %invoke.cont, %entry
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
 declare i32 @__gxx_personality_v0(...)
+declare i32 @__objc_personality_v0(...)
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 9eada8a..a618a21 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -484,12 +484,14 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test9b for the same testcase without a split backedge.
 
 ; CHECK: define void @test9(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: call i8* @objc_retain
 ; CHECK: }
 define void @test9() nounwind {
 entry:
@@ -551,13 +553,79 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; Like test9, but without a split backedge. This we can optimize.
 
-; CHECK: define void @test10(
+; CHECK: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
+define void @test9b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test10b for the same testcase without a split backedge.
+
+; CHECK: define void @test10(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: }
 define void @test10() nounwind {
 entry:
   %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
@@ -618,3 +686,68 @@ forcoll.empty:
   call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; Like test10, but without a split backedge. This we can optimize.
+
+; CHECK: define void @test10b(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: @objc_retain
+; CHECK: }
+define void @test10b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  call void @callee()
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
new file mode 100644
index 0000000..30610f8
--- /dev/null
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -0,0 +1,15 @@
+; RUN: opt -disable-output -objc-arc-contract %s
+; test that we don't crash on unreachable code
+%2 = type opaque
+
+define void @_i_Test__foo(%2 *%x) {
+entry:
+  unreachable
+
+return:                                           ; No predecessors!
+  %bar = bitcast %2* %x to i8*
+  %foo = call i8* @objc_autoreleaseReturnValue(i8* %bar) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleaseReturnValue(i8*)
diff --git a/test/Transforms/ScalarRepl/negative-memset.ll b/test/Transforms/ScalarRepl/negative-memset.ll
new file mode 100644
index 0000000..e52ab46
--- /dev/null
+++ b/test/Transforms/ScalarRepl/negative-memset.ll
@@ -0,0 +1,20 @@
+; PR12202
+; RUN: opt < %s -scalarrepl -S
+; Ensure that we do not hang or crash when feeding a negative value to memset
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+define i32 @test() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %buff = alloca [1 x i8], align 1
+  store i32 0, i32* %retval
+  %0 = bitcast [1 x i8]* %buff to i8*
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  %arraydecay = getelementptr inbounds [1 x i8]* %buff, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
+  ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 8b81186..4fdfb04 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -9,6 +9,7 @@ config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
 config.targets_to_build = "@TARGETS_TO_BUILD@"
 config.llvm_bindings = "@LLVM_BINDINGS@"
+config.host_os = "@HOST_OS@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 336c83d..fb090ee 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -85,8 +85,11 @@ void BugDriver::EmitProgressBitcode(const Module *M,
   if (NoFlyer || PassesToRun.empty()) return;
   outs() << "\n*** You can reproduce the problem with: ";
   if (UseValgrind) outs() << "valgrind ";
-  outs() << "opt " << Filename << " ";
-  outs() << getPassesString(PassesToRun) << "\n";
+  outs() << "opt " << Filename;
+  for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
+    outs() << " -load " << PluginLoader::getPlugin(i);
+  }
+  outs() << " " << getPassesString(PassesToRun) << "\n";
 }
 
 cl::opt<bool> SilencePasses("silence-passes",
@@ -145,10 +148,9 @@ bool BugDriver::runPasses(Module *Program,
     return 1;
   }
 
-  sys::Path tool = PrependMainExecutablePath("opt", getToolName(),
-                                             (void*)"opt");
+  sys::Path tool = sys::Program::FindProgramByName("opt");
   if (tool.empty()) {
-    errs() << "Cannot find `opt' in executable directory!\n";
+    errs() << "Cannot find `opt' in PATH!\n";
     return 1;
   }
 
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 9378ef2..a5d2e61 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,5 +1,22 @@
+
+link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+
 set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
 
+if( LLVM_USE_OPROFILE )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    OProfileJIT
+    )
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    IntelJITEvents
+    )
+endif( LLVM_USE_INTEL_JITEVENTS )
+
 add_llvm_tool(lli
   lli.cpp
   )
diff --git a/tools/lli/Makefile b/tools/lli/Makefile
index 292f608..100fc2e 100644
--- a/tools/lli/Makefile
+++ b/tools/lli/Makefile
@@ -9,6 +9,21 @@
 
 LEVEL := ../..
 TOOLNAME := lli
+
+include $(LEVEL)/Makefile.config
+
 LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag
 
-include $(LEVEL)/Makefile.common
+# If Intel JIT Events support is confiured, link against the LLVM Intel JIT
+# Events interface library
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  LINK_COMPONENTS += inteljitevents
+endif
+
+# If oprofile support is confiured, link against the LLVM oprofile interface
+# library
+ifeq ($(USE_OPROFILE), 1)
+  LINK_COMPONENTS += oprofilejit
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 0e8d1d8..efcc1f5 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -238,7 +238,12 @@ int main(int argc, char **argv, char * const *envp) {
     exit(1);
   }
 
-  EE->RegisterJITEventListener(createOProfileJITEventListener());
+  // The following functions have no effect if their respective profiling
+  // support wasn't enabled in the build configuration.
+  EE->RegisterJITEventListener(
+                JITEventListener::createOProfileJITEventListener());
+  EE->RegisterJITEventListener(
+                JITEventListener::createIntelJITEventListener());
 
   EE->DisableLazyCompilation(NoLazyCompilation);
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index bf068c4..d630087 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -331,7 +331,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
   // BLOCKINFO is a special part of the stream.
   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-    if (Dump) errs() << Indent << "<BLOCKINFO_BLOCK/>\n";
+    if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
     if (Stream.ReadBlockInfoBlock())
       return Error("Malformed BlockInfoBlock");
     uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
@@ -345,16 +345,16 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
   const char *BlockName = 0;
   if (Dump) {
-    errs() << Indent << "<";
+    outs() << Indent << "<";
     if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader())))
-      errs() << BlockName;
+      outs() << BlockName;
     else
-      errs() << "UnknownBlock" << BlockID;
+      outs() << "UnknownBlock" << BlockID;
 
     if (NonSymbolic && BlockName)
-      errs() << " BlockID=" << BlockID;
+      outs() << " BlockID=" << BlockID;
 
-    errs() << " NumWords=" << NumWords
+    outs() << " NumWords=" << NumWords
            << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
   }
 
@@ -376,11 +376,11 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
       if (Dump) {
-        errs() << Indent << "</";
+        outs() << Indent << "</";
         if (BlockName)
-          errs() << BlockName << ">\n";
+          outs() << BlockName << ">\n";
         else
-          errs() << "UnknownBlock" << BlockID << ">\n";
+          outs() << "UnknownBlock" << BlockID << ">\n";
       }
       return false;
     }
@@ -422,25 +422,25 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
         BlockStats.CodeFreq[Code].NumAbbrev++;
 
       if (Dump) {
-        errs() << Indent << "  <";
+        outs() << Indent << "  <";
         if (const char *CodeName =
               GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          errs() << CodeName;
+          outs() << CodeName;
         else
-          errs() << "UnknownCode" << Code;
+          outs() << "UnknownCode" << Code;
         if (NonSymbolic &&
             GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          errs() << " codeid=" << Code;
+          outs() << " codeid=" << Code;
         if (AbbrevID != bitc::UNABBREV_RECORD)
-          errs() << " abbrevid=" << AbbrevID;
+          outs() << " abbrevid=" << AbbrevID;
 
         for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          errs() << " op" << i << "=" << (int64_t)Record[i];
+          outs() << " op" << i << "=" << (int64_t)Record[i];
 
-        errs() << "/>";
+        outs() << "/>";
 
         if (BlobStart) {
-          errs() << " blob data = ";
+          outs() << " blob data = ";
           bool BlobIsPrintable = true;
           for (unsigned i = 0; i != BlobLen; ++i)
             if (!isprint(BlobStart[i])) {
@@ -449,12 +449,12 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
             }
 
           if (BlobIsPrintable)
-            errs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
+            outs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
           else
-            errs() << "unprintable, " << BlobLen << " bytes.";
+            outs() << "unprintable, " << BlobLen << " bytes.";
         }
 
-        errs() << "\n";
+        outs() << "\n";
       }
 
       break;
@@ -525,58 +525,58 @@ static int AnalyzeBitcode() {
     ++NumTopBlocks;
   }
 
-  if (Dump) errs() << "\n\n";
+  if (Dump) outs() << "\n\n";
 
   uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
   // Print a summary of the read file.
-  errs() << "Summary of " << InputFilename << ":\n";
-  errs() << "         Total size: ";
+  outs() << "Summary of " << InputFilename << ":\n";
+  outs() << "         Total size: ";
   PrintSize(BufferSizeBits);
-  errs() << "\n";
-  errs() << "        Stream type: ";
+  outs() << "\n";
+  outs() << "        Stream type: ";
   switch (CurStreamType) {
-  case UnknownBitstream: errs() << "unknown\n"; break;
-  case LLVMIRBitstream:  errs() << "LLVM IR\n"; break;
+  case UnknownBitstream: outs() << "unknown\n"; break;
+  case LLVMIRBitstream:  outs() << "LLVM IR\n"; break;
   }
-  errs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
-  errs() << "\n";
+  outs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  outs() << "\n";
 
   // Emit per-block stats.
-  errs() << "Per-block Summary:\n";
+  outs() << "Per-block Summary:\n";
   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
        E = BlockIDStats.end(); I != E; ++I) {
-    errs() << "  Block ID #" << I->first;
+    outs() << "  Block ID #" << I->first;
     if (const char *BlockName = GetBlockName(I->first, StreamFile))
-      errs() << " (" << BlockName << ")";
-    errs() << ":\n";
+      outs() << " (" << BlockName << ")";
+    outs() << ":\n";
 
     const PerBlockIDStats &Stats = I->second;
-    errs() << "      Num Instances: " << Stats.NumInstances << "\n";
-    errs() << "         Total Size: ";
+    outs() << "      Num Instances: " << Stats.NumInstances << "\n";
+    outs() << "         Total Size: ";
     PrintSize(Stats.NumBits);
-    errs() << "\n";
+    outs() << "\n";
     double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
     errs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
     if (Stats.NumInstances > 1) {
-      errs() << "       Average Size: ";
+      outs() << "       Average Size: ";
       PrintSize(Stats.NumBits/(double)Stats.NumInstances);
-      errs() << "\n";
-      errs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+      outs() << "\n";
+      outs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
              << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
-      errs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+      outs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
              << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
-      errs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+      outs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
              << Stats.NumRecords/(double)Stats.NumInstances << "\n";
     } else {
-      errs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
-      errs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
-      errs() << "        Num Records: " << Stats.NumRecords << "\n";
+      outs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      outs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      outs() << "        Num Records: " << Stats.NumRecords << "\n";
     }
     if (Stats.NumRecords) {
       double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
-      errs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+      outs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
     }
-    errs() << "\n";
+    outs() << "\n";
 
     // Print a histogram of the codes we see.
     if (!NoHistogram && !Stats.CodeFreq.empty()) {
@@ -587,7 +587,7 @@ static int AnalyzeBitcode() {
       std::stable_sort(FreqPairs.begin(), FreqPairs.end());
       std::reverse(FreqPairs.begin(), FreqPairs.end());
 
-      errs() << "\tRecord Histogram:\n";
+      outs() << "\tRecord Histogram:\n";
       fprintf(stderr, "\t\t  Count    # Bits   %% Abv  Record Kind\n");
       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
@@ -607,7 +607,7 @@ static int AnalyzeBitcode() {
         else
           fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second);
       }
-      errs() << "\n";
+      outs() << "\n";
 
     }
   }
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index 8113fd4..a5a99f5 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -319,13 +319,17 @@ class FunctionDifferenceEngine {
       bool Difference = false;
 
       DenseMap<ConstantInt*,BasicBlock*> LCases;
-      for (unsigned I = 0, E = LI->getNumCases(); I != E; ++I)
-        LCases[LI->getCaseValue(I)] = LI->getCaseSuccessor(I);
-      for (unsigned I = 0, E = RI->getNumCases(); I != E; ++I) {
-        ConstantInt *CaseValue = RI->getCaseValue(I);
+      
+      for (SwitchInst::CaseIt I = LI->case_begin(), E = LI->case_end();
+           I != E; ++I)
+        LCases[I.getCaseValue()] = I.getCaseSuccessor();
+        
+      for (SwitchInst::CaseIt I = RI->case_begin(), E = RI->case_end();
+           I != E; ++I) {
+        ConstantInt *CaseValue = I.getCaseValue();
         BasicBlock *LCase = LCases[CaseValue];
         if (LCase) {
-          if (TryUnify) tryUnify(LCase, RI->getCaseSuccessor(I));
+          if (TryUnify) tryUnify(LCase, I.getCaseSuccessor());
           LCases.erase(CaseValue);
         } else if (Complain || !Difference) {
           if (Complain)
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index dc72974..ceed2d6 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -512,6 +512,9 @@ int main(int argc, char **argv) {
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
 
+  // Register the target printer for --version.
+  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   TripleName = Triple::normalize(TripleName);
   setDwarfDebugFlags(argc, argv);
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 9cf83b6..8d9e51e 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -204,9 +204,10 @@ static void SortAndPrintSymbolList() {
       strcpy(SymbolSizeStr, "        ");
 
     if (i->Address != object::UnknownAddressOrSize)
-      format("%08"PRIx64, i->Address).print(SymbolAddrStr, sizeof(SymbolAddrStr));
+      format("%08" PRIx64, i->Address).print(SymbolAddrStr,
+                                             sizeof(SymbolAddrStr));
     if (i->Size != object::UnknownAddressOrSize)
-      format("%08"PRIx64, i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+      format("%08" PRIx64, i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
 
     if (OutputFormat == posix) {
       outs() << i->Name << " " << i->TypeChar << " "
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index deb4dd1..94becae 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -300,7 +300,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
                                    DebugOut, nulls())) {
-          outs() << format("%8"PRIx64":\t", SectionAddr + Index);
+          outs() << format("%8" PRIx64 ":\t", SectionAddr + Index);
           DumpBytes(StringRef(Bytes.data() + Index, Size));
           IP->printInst(&Inst, outs(), "");
           outs() << "\n";
@@ -327,8 +327,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
           if (error(rel_cur->getValueString(val))) goto skip_print_rel;
 
-          outs() << format("\t\t\t%8"PRIx64": ", SectionAddr + addr) << name << "\t"
-                 << val << "\n";
+          outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
+                 << "\t" << val << "\n";
 
         skip_print_rel:
           ++rel_cur;
@@ -388,8 +388,8 @@ static void PrintSectionHeaders(const ObjectFile *o) {
     if (error(si->isBSS(BSS))) return;
     std::string Type = (std::string(Text ? "TEXT " : "") +
                         (Data ? "DATA " : "") + (BSS ? "BSS" : ""));
-    outs() << format("%3d %-13s %09"PRIx64" %017"PRIx64" %s\n", i, Name.str().c_str(), Size,
-                     Address, Type.c_str());
+    outs() << format("%3d %-13s %09" PRIx64 " %017" PRIx64 " %s\n",
+                     i, Name.str().c_str(), Size, Address, Type.c_str());
     ++i;
   }
 }
@@ -411,7 +411,7 @@ static void PrintSectionContents(const ObjectFile *o) {
 
     // Dump out the content as hex and printable ascii characters.
     for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
-      outs() << format(" %04"PRIx64" ", BaseAddr + addr);
+      outs() << format(" %04" PRIx64 " ", BaseAddr + addr);
       // Dump line of hex.
       for (std::size_t i = 0; i < 16; ++i) {
         if (i != 0 && i % 4 == 0)
@@ -519,7 +519,7 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Type == SymbolRef::ST_Function)
         FileFunc = 'F';
 
-      outs() << format("%08"PRIx64, Address) << " "
+      outs() << format("%08" PRIx64, Address) << " "
              << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
              << (Weak ? 'w' : ' ') // Weak?
              << ' ' // Constructor. Not supported yet.
@@ -539,7 +539,7 @@ static void PrintSymbolTable(const ObjectFile *o) {
         outs() << SectionName;
       }
       outs() << '\t'
-             << format("%08"PRIx64" ", Size)
+             << format("%08" PRIx64 " ", Size)
              << Name
              << '\n';
     }
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 3f8789d..3be1289 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELF.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Format.h"
@@ -78,26 +79,39 @@ std::string GetFlagStr(uint32_t Flags) {
   return result;
 }
 
-void DumpSymbol(const SymbolRef &sym) {
+void DumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) {
     StringRef Name;
     SymbolRef::Type Type;
     uint32_t Flags;
     uint64_t Address;
     uint64_t Size;
     uint64_t FileOffset;
-    sym.getName(Name);
-    sym.getAddress(Address);
-    sym.getSize(Size);
-    sym.getFileOffset(FileOffset);
-    sym.getType(Type);
-    sym.getFlags(Flags);
+    Sym.getName(Name);
+    Sym.getAddress(Address);
+    Sym.getSize(Size);
+    Sym.getFileOffset(FileOffset);
+    Sym.getType(Type);
+    Sym.getFlags(Flags);
+    std::string FullName = Name;
+
+    // If this is a dynamic symbol from an ELF object, append
+    // the symbol's version to the name.
+    if (IsDynamic && obj->isELF()) {
+      StringRef Version;
+      bool IsDefault;
+      GetELFSymbolVersion(obj, Sym, Version, IsDefault);
+      if (!Version.empty()) {
+        FullName += (IsDefault ? "@@" : "@");
+        FullName += Version;
+      }
+    }
 
     // format() can't handle StringRefs
-    outs() << format("  %-32s", Name.str().c_str())
+    outs() << format("  %-32s", FullName.c_str())
            << format("  %-4s", GetTypeStr(Type))
-           << format("  %16"PRIx64, Address)
-           << format("  %16"PRIx64, Size)
-           << format("  %16"PRIx64, FileOffset)
+           << format("  %16" PRIx64, Address)
+           << format("  %16" PRIx64, Size)
+           << format("  %16" PRIx64, FileOffset)
            << "  " << GetFlagStr(Flags)
            << "\n";
 }
@@ -111,7 +125,7 @@ void DumpSymbols(const ObjectFile *obj) {
   symbol_iterator it = obj->begin_symbols();
   symbol_iterator ie = obj->end_symbols();
   while (it != ie) {
-    DumpSymbol(*it);
+    DumpSymbol(*it, obj, false);
     it.increment(ec);
     if (ec)
       report_fatal_error("Symbol iteration failed");
@@ -128,7 +142,7 @@ void DumpDynamicSymbols(const ObjectFile *obj) {
   symbol_iterator it = obj->begin_dynamic_symbols();
   symbol_iterator ie = obj->end_dynamic_symbols();
   while (it != ie) {
-    DumpSymbol(*it);
+    DumpSymbol(*it, obj, true);
     it.increment(ec);
     if (ec)
       report_fatal_error("Symbol iteration failed");
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 990582d..01a7d15 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -58,9 +58,11 @@ public:
   uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                unsigned SectionID);
 
-  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size);
-  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                       uint8_t *FunctionEnd);
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return 0;
+  }
+
 };
 
 uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -75,18 +77,6 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
   return (uint8_t*)sys::Memory::AllocateRWX(Size, 0, 0).base();
 }
 
-uint8_t *TrivialMemoryManager::startFunctionBody(const char *Name,
-                                                 uintptr_t &Size) {
-  return (uint8_t*)sys::Memory::AllocateRWX(Size, 0, 0).base();
-}
-
-void TrivialMemoryManager::endFunctionBody(const char *Name,
-                                           uint8_t *FunctionStart,
-                                           uint8_t *FunctionEnd) {
-  uintptr_t Size = FunctionEnd - FunctionStart + 1;
-  FunctionMemory.push_back(sys::MemoryBlock(FunctionStart, Size));
-}
-
 static const char *ProgramName;
 
 static void Message(const char *Type, const Twine &Msg) {
diff --git a/unittests/ADT/HashingTest.cpp b/unittests/ADT/HashingTest.cpp
index f5d6aed..b148f14 100644
--- a/unittests/ADT/HashingTest.cpp
+++ b/unittests/ADT/HashingTest.cpp
@@ -51,6 +51,10 @@ using namespace llvm;
 
 namespace {
 
+enum TestEnumeration {
+  TE_Foo = 42,
+  TE_Bar = 43
+};
 
 TEST(HashingTest, HashValueBasicTest) {
   int x = 42, y = 43, c = 'x';
@@ -61,7 +65,9 @@ TEST(HashingTest, HashValueBasicTest) {
   const volatile int cvi = 71;
   uintptr_t addr = reinterpret_cast<uintptr_t>(&y);
   EXPECT_EQ(hash_value(42), hash_value(x));
+  EXPECT_EQ(hash_value(42), hash_value(TE_Foo));
   EXPECT_NE(hash_value(42), hash_value(y));
+  EXPECT_NE(hash_value(42), hash_value(TE_Bar));
   EXPECT_NE(hash_value(42), hash_value(p));
   EXPECT_EQ(hash_value(71), hash_value(i));
   EXPECT_EQ(hash_value(71), hash_value(ci));
diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp
new file mode 100644
index 0000000..9114875
--- /dev/null
+++ b/unittests/ADT/SmallPtrSetTest.cpp
@@ -0,0 +1,72 @@
+//===- llvm/unittest/ADT/SmallPtrSetTest.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SmallPtrSet unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+// SmallPtrSet swapping test.
+TEST(SmallPtrSetTest, SwapTest) {
+  int buf[10];
+
+  SmallPtrSet<int *, 2> a;
+  SmallPtrSet<int *, 2> b;
+
+  a.insert(&buf[0]);
+  a.insert(&buf[1]);
+  b.insert(&buf[2]);
+
+  std::swap(a, b);
+
+  EXPECT_EQ(1U, a.size());
+  EXPECT_EQ(2U, b.size());
+  EXPECT_TRUE(a.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[0]));
+  EXPECT_TRUE(b.count(&buf[1]));
+
+  b.insert(&buf[3]);
+  std::swap(a, b);
+
+  EXPECT_EQ(3U, a.size());
+  EXPECT_EQ(1U, b.size());
+  EXPECT_TRUE(a.count(&buf[0]));
+  EXPECT_TRUE(a.count(&buf[1]));
+  EXPECT_TRUE(a.count(&buf[3]));
+  EXPECT_TRUE(b.count(&buf[2]));
+
+  std::swap(a, b);
+
+  EXPECT_EQ(1U, a.size());
+  EXPECT_EQ(3U, b.size());
+  EXPECT_TRUE(a.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[0]));
+  EXPECT_TRUE(b.count(&buf[1]));
+  EXPECT_TRUE(b.count(&buf[3]));
+
+  a.insert(&buf[4]);
+  a.insert(&buf[5]);
+  a.insert(&buf[6]);
+
+  std::swap(b, a);
+
+  EXPECT_EQ(3U, a.size());
+  EXPECT_EQ(4U, b.size());
+  EXPECT_TRUE(b.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[4]));
+  EXPECT_TRUE(b.count(&buf[5]));
+  EXPECT_TRUE(b.count(&buf[6]));
+  EXPECT_TRUE(a.count(&buf[0]));
+  EXPECT_TRUE(a.count(&buf[1]));
+  EXPECT_TRUE(a.count(&buf[3]));
+}
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index b2f6fdc..cc7a7fb 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -310,4 +310,122 @@ TEST(StringRefTest, Hashing) {
             hash_value(StringRef("hello world").slice(1, -1)));
 }
 
+struct UnsignedPair {
+  const char *Str;
+  uint64_t Expected;
+} Unsigned[] =
+  { {"0", 0}
+  , {"255", 255}
+  , {"256", 256}
+  , {"65535", 65535}
+  , {"65536", 65536}
+  , {"4294967295", 4294967295ULL}
+  , {"4294967296", 4294967296ULL}
+  , {"18446744073709551615", 18446744073709551615ULL}
+  , {"042", 34}
+  , {"0x42", 66}
+  , {"0b101010", 42}
+  };
+
+struct SignedPair {
+  const char *Str;
+  int64_t Expected;
+} Signed[] =
+  { {"0", 0}
+  , {"-0", 0}
+  , {"127", 127}
+  , {"128", 128}
+  , {"-128", -128}
+  , {"-129", -129}
+  , {"32767", 32767}
+  , {"32768", 32768}
+  , {"-32768", -32768}
+  , {"-32769", -32769}
+  , {"2147483647", 2147483647LL}
+  , {"2147483648", 2147483648LL}
+  , {"-2147483648", -2147483648LL}
+  , {"-2147483649", -2147483649LL}
+  , {"-9223372036854775808", -(9223372036854775807LL) - 1}
+  , {"042", 34}
+  , {"0x42", 66}
+  , {"0b101010", 42}
+  , {"-042", -34}
+  , {"-0x42", -66}
+  , {"-0b101010", -42}
+  };
+
+TEST(StringRefTest, getAsInteger) {
+  uint8_t U8;
+  uint16_t U16;
+  uint32_t U32;
+  uint64_t U64;
+
+  for (size_t i = 0; i < array_lengthof(Unsigned); ++i) {
+    bool U8Success = StringRef(Unsigned[i].Str).getAsInteger(0, U8);
+    if (static_cast<uint8_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U8Success);
+      EXPECT_EQ(U8, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U8Success);
+    }
+    bool U16Success = StringRef(Unsigned[i].Str).getAsInteger(0, U16);
+    if (static_cast<uint16_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U16Success);
+      EXPECT_EQ(U16, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U16Success);
+    }
+    bool U32Success = StringRef(Unsigned[i].Str).getAsInteger(0, U32);
+    if (static_cast<uint32_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U32Success);
+      EXPECT_EQ(U32, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U32Success);
+    }
+    bool U64Success = StringRef(Unsigned[i].Str).getAsInteger(0, U64);
+    if (static_cast<uint64_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U64Success);
+      EXPECT_EQ(U64, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U64Success);
+    }
+  }
+
+  int8_t S8;
+  int16_t S16;
+  int32_t S32;
+  int64_t S64;
+
+  for (size_t i = 0; i < array_lengthof(Signed); ++i) {
+    bool S8Success = StringRef(Signed[i].Str).getAsInteger(0, S8);
+    if (static_cast<int8_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S8Success);
+      EXPECT_EQ(S8, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S8Success);
+    }
+    bool S16Success = StringRef(Signed[i].Str).getAsInteger(0, S16);
+    if (static_cast<int16_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S16Success);
+      EXPECT_EQ(S16, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S16Success);
+    }
+    bool S32Success = StringRef(Signed[i].Str).getAsInteger(0, S32);
+    if (static_cast<int32_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S32Success);
+      EXPECT_EQ(S32, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S32Success);
+    }
+    bool S64Success = StringRef(Signed[i].Str).getAsInteger(0, S64);
+    if (static_cast<int64_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S64Success);
+      EXPECT_EQ(S64, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S64Success);
+    }
+  }
+}
+
 } // end anonymous namespace
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index c6a904e..60423f2 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -87,11 +87,35 @@ add_llvm_unittest(ExecutionEngine
   ExecutionEngine/ExecutionEngineTest.cpp
   )
 
+if( LLVM_USE_INTEL_JITEVENTS )
+  include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
+  link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+  set(ProfileTestSources
+    ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
+    )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    IntelJITEvents
+    ) 
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_OPROFILE )
+  set(ProfileTestSources
+    ${ProfileTestSources}
+    ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
+    )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    OProfileJIT
+    )
+endif( LLVM_USE_OPROFILE )
+
 set(JITTestsSources
   ExecutionEngine/JIT/JITEventListenerTest.cpp
   ExecutionEngine/JIT/JITMemoryManagerTest.cpp
   ExecutionEngine/JIT/JITTest.cpp
   ExecutionEngine/JIT/MultiJITTest.cpp
+  ${ProfileTestSources}
   )
 
 if(MSVC)
diff --git a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
new file mode 100644
index 0000000..8ed7a15
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
@@ -0,0 +1,110 @@
+//===- JITEventListenerTest.cpp - Tests for Intel JITEventListener --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITEventListenerTestCommon.h"
+
+using namespace llvm;
+
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+
+#include <map>
+#include <list>
+
+namespace {
+
+// map of function ("method") IDs to source locations
+NativeCodeMap ReportedDebugFuncs;
+
+} // namespace
+
+/// Mock implementaion of Intel JIT API jitprofiling library
+namespace test_jitprofiling {
+
+int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+  switch (EventType) {
+    case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: {
+      EXPECT_TRUE(0 != EventSpecificData);
+      iJIT_Method_Load* msg = static_cast<iJIT_Method_Load*>(EventSpecificData);
+
+      ReportedDebugFuncs[msg->method_id];
+
+      for(unsigned int i = 0; i < msg->line_number_size; ++i) {
+        EXPECT_TRUE(0 != msg->line_number_table);
+        std::pair<std::string, unsigned int> loc(
+          std::string(msg->source_file_name),
+          msg->line_number_table[i].LineNumber);
+        ReportedDebugFuncs[msg->method_id].push_back(loc);
+      }
+    }
+    break;
+    case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: {
+      EXPECT_TRUE(0 != EventSpecificData);
+      unsigned int UnloadId
+        = *reinterpret_cast<unsigned int*>(EventSpecificData);
+      EXPECT_TRUE(1 == ReportedDebugFuncs.erase(UnloadId));
+    }
+    default:
+      break;
+  }
+  return 0;
+}
+
+iJIT_IsProfilingActiveFlags IsProfilingActive(void) {
+  // for testing, pretend we have an Intel Parallel Amplifier XE 2011
+  // instance attached
+  return iJIT_SAMPLING_ON;
+}
+
+unsigned int GetNewMethodID(void) {
+  static unsigned int id = 0;
+  return ++id;
+}
+
+} //namespace test_jitprofiling
+
+class IntelJITEventListenerTest
+  : public JITEventListenerTestBase<IntelJITEventsWrapper> {
+public:
+  IntelJITEventListenerTest()
+  : JITEventListenerTestBase<IntelJITEventsWrapper>(
+      new IntelJITEventsWrapper(test_jitprofiling::NotifyEvent, 0,
+        test_jitprofiling::IsProfilingActive, 0, 0,
+        test_jitprofiling::GetNewMethodID))
+  {
+    EXPECT_TRUE(0 != MockWrapper);
+
+    Listener.reset(JITEventListener::createIntelJITEventListener(
+      MockWrapper.get()));
+    EXPECT_TRUE(0 != Listener);
+    EE->RegisterJITEventListener(Listener.get());
+  }
+};
+
+TEST_F(IntelJITEventListenerTest, NoDebugInfo) {
+  TestNoDebugInfo(ReportedDebugFuncs);
+}
+
+TEST_F(IntelJITEventListenerTest, SingleLine) {
+  TestSingleLine(ReportedDebugFuncs);
+}
+
+TEST_F(IntelJITEventListenerTest, MultipleLines) {
+  TestMultipleLines(ReportedDebugFuncs);
+}
+
+// This testcase is disabled because the Intel JIT API does not support a single
+// JITted function with source lines associated with multiple files
+/*
+TEST_F(IntelJITEventListenerTest, MultipleFiles) {
+  TestMultipleFiles(ReportedDebugFuncs);
+}
+*/
+
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
new file mode 100644
index 0000000..53608cb
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
@@ -0,0 +1,209 @@
+//===- JITEventListenerTestCommon.h - Helper for JITEventListener tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------------------===//
+
+#ifndef JIT_EVENT_LISTENER_TEST_COMMON_H
+#define JIT_EVENT_LISTENER_TEST_COMMON_H
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+#include <vector>
+#include <string>
+#include <utility>
+
+typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
+typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
+
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required to create a JIT.
+    llvm::InitializeNativeTarget();
+  }
+};
+
+inline unsigned int getLine() {
+  return 12;
+}
+
+inline unsigned int getCol() {
+  return 0;
+}
+
+inline const char* getFilename() {
+  return "mock_source_file.cpp";
+}
+
+// Test fixture shared by tests for listener implementations
+template<typename WrapperT>
+class JITEventListenerTestBase : public testing::Test {
+protected:
+  llvm::OwningPtr<WrapperT> MockWrapper;
+  llvm::OwningPtr<llvm::JITEventListener> Listener;
+
+public:
+  llvm::Module* M;
+  llvm::MDNode* Scope;
+  llvm::ExecutionEngine* EE;
+  llvm::DIBuilder* DebugBuilder;
+  llvm::IRBuilder<> Builder;
+
+  JITEventListenerTestBase(WrapperT* w)
+  : MockWrapper(w)
+  , M(new llvm::Module("module", llvm::getGlobalContext()))
+  , EE(llvm::EngineBuilder(M)
+    .setEngineKind(llvm::EngineKind::JIT)
+    .setOptLevel(llvm::CodeGenOpt::None)
+    .create())
+  , DebugBuilder(new llvm::DIBuilder(*M))
+  , Builder(llvm::getGlobalContext())
+  {
+    DebugBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C_plus_plus,
+                                    "JIT",
+                                    "JIT",
+                                    "JIT",
+                                    true,
+                                    "",
+                                    1);
+
+    Scope = DebugBuilder->createFile(getFilename(), ".");
+  }
+
+  llvm::Function *buildFunction(const SourceLocations& DebugLocations) {
+    using namespace llvm;
+
+    LLVMContext& GlobalContext = getGlobalContext();
+
+    SourceLocations::const_iterator CurrentDebugLocation
+      = DebugLocations.begin();
+
+    if (CurrentDebugLocation != DebugLocations.end()) {
+      DebugLoc DebugLocation = DebugLoc::get(getLine(), getCol(),
+          DebugBuilder->createFile(CurrentDebugLocation->first, "."));
+      Builder.SetCurrentDebugLocation(DebugLocation);
+      CurrentDebugLocation++;
+    }
+
+    Function *Result = Function::Create(
+        TypeBuilder<int32_t(int32_t), false>::get(GlobalContext),
+        GlobalValue::ExternalLinkage, "id", M);
+    Value *Arg = Result->arg_begin();
+    BasicBlock *BB = BasicBlock::Create(M->getContext(), "entry", Result);
+    Builder.SetInsertPoint(BB);
+    Value* one = ConstantInt::get(GlobalContext, APInt(32, 1));
+    for(; CurrentDebugLocation != DebugLocations.end();
+        ++CurrentDebugLocation) {
+      Arg = Builder.CreateMul(Arg, Builder.CreateAdd(Arg, one));
+      Builder.SetCurrentDebugLocation(
+        DebugLoc::get(CurrentDebugLocation->second, 0,
+                      DebugBuilder->createFile(CurrentDebugLocation->first, ".")));
+    }
+    Builder.CreateRet(Arg);
+    return Result;
+  }
+
+  void TestNoDebugInfo(NativeCodeMap& ReportedDebugFuncs) {
+    SourceLocations DebugLocations;
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    //Cause JITting and callbacks to our listener
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestSingleLine(NativeCodeMap& ReportedDebugFuncs) {
+    SourceLocations DebugLocations;
+    DebugLocations.push_back(std::make_pair(std::string(getFilename()),
+                                            getLine()));
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    EXPECT_STREQ(ReportedDebugFuncs.begin()->second.begin()->first.c_str(),
+                 getFilename());
+    EXPECT_EQ(ReportedDebugFuncs.begin()->second.begin()->second, getLine());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestMultipleLines(NativeCodeMap& ReportedDebugFuncs) {
+    using namespace std;
+
+    SourceLocations DebugLocations;
+    unsigned int c = 5;
+    for(unsigned int i = 0; i < c; ++i) {
+      DebugLocations.push_back(make_pair(string(getFilename()), getLine() + i));
+    }
+
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    SourceLocations& FunctionInfo = ReportedDebugFuncs.begin()->second;
+    EXPECT_EQ(c, FunctionInfo.size());
+
+    int VerifyCount = 0;
+    for(SourceLocations::iterator i = FunctionInfo.begin();
+        i != FunctionInfo.end();
+        ++i) {
+      EXPECT_STREQ(i->first.c_str(), getFilename());
+      EXPECT_EQ(i->second, getLine() + VerifyCount);
+      VerifyCount++;
+    }
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestMultipleFiles(NativeCodeMap& ReportedDebugFuncs) {
+
+    std::string secondFilename("another_file.cpp");
+
+    SourceLocations DebugLocations;
+    DebugLocations.push_back(std::make_pair(std::string(getFilename()),
+                                            getLine()));
+    DebugLocations.push_back(std::make_pair(secondFilename, getLine()));
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    SourceLocations& FunctionInfo = ReportedDebugFuncs.begin()->second;
+    EXPECT_TRUE(2 == FunctionInfo.size());
+
+    EXPECT_STREQ(FunctionInfo.at(0).first.c_str(), getFilename());
+    EXPECT_STREQ(FunctionInfo.at(1).first.c_str(), secondFilename.c_str());
+
+    EXPECT_EQ(FunctionInfo.at(0).second, getLine());
+    EXPECT_EQ(FunctionInfo.at(1).second, getLine());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+};
+
+#endif //JIT_EVENT_LISTENER_TEST_COMMON_H
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 4d10ee6..fa52321 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -64,6 +64,10 @@ public:
     : Base(JITMemoryManager::CreateDefaultMemManager()) {
     stubsAllocated = 0;
   }
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return Base->getPointerToNamedFunction(Name, AbortOnFailure);
+  }
 
   virtual void setMemoryWritable() { Base->setMemoryWritable(); }
   virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index f5abe75..b429033 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -12,6 +12,29 @@ TESTNAME = JIT
 LINK_COMPONENTS := asmparser bitreader bitwriter core jit native support
 
 include $(LEVEL)/Makefile.config
+
+SOURCES := JITEventListenerTest.cpp
+
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  # Build the Intel JIT Events interface tests
+  SOURCES += IntelJITEventListenerTest.cpp
+
+  # Add the Intel JIT Events include directory
+  CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR)
+
+  # Link against the LLVM Intel JIT Evens interface library
+  LINK_COMPONENTS += inteljitevents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+  # Build the OProfile JIT interface tests
+  SOURCES += OProfileJITEventListenerTest.cpp
+
+  # Link against the LLVM oprofile interface library
+  LINK_COMPONENTS += oprofilejit
+endif
+
+
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
 
 # Permit these tests to use the JIT's symbolic lookup.
diff --git a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
new file mode 100644
index 0000000..9b0ee60
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
@@ -0,0 +1,166 @@
+//===- OProfileJITEventListenerTest.cpp - Unit tests for OProfileJITEventsListener --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+#include "JITEventListenerTestCommon.h"
+
+#include <map>
+#include <list>
+
+using namespace llvm;
+
+namespace {
+
+struct OprofileNativeFunction {
+  const char* Name;
+  uint64_t Addr;
+  const void* CodePtr;
+  unsigned int CodeSize;
+
+  OprofileNativeFunction(const char* name,
+                         uint64_t addr,
+                         const void* code,
+                         unsigned int size)
+  : Name(name)
+  , Addr(addr)
+  , CodePtr(code)
+  , CodeSize(size) {
+  }
+};
+
+typedef std::list<OprofileNativeFunction> NativeFunctionList;
+typedef std::list<debug_line_info> NativeDebugList;
+NativeFunctionList NativeFunctions;
+
+NativeCodeMap ReportedDebugFuncs;
+
+} // namespace
+
+/// Mock implementaion of opagent library
+namespace test_opagent {
+
+op_agent_t globalAgent = reinterpret_cast<op_agent_t>(42);
+
+op_agent_t open_agent()
+{
+  // return non-null op_agent_t
+  return globalAgent;
+}
+
+int close_agent(op_agent_t agent)
+{
+  EXPECT_EQ(globalAgent, agent);
+  return 0;
+}
+
+int write_native_code(op_agent_t agent,
+                      const char* name,
+                      uint64_t addr,
+                      void const* code,
+                      unsigned int size)
+{
+  EXPECT_EQ(globalAgent, agent);
+  OprofileNativeFunction func(name, addr, code, size);
+  NativeFunctions.push_back(func);
+
+  // Verify no other registration has take place for the same address
+  EXPECT_TRUE(ReportedDebugFuncs.find(addr) == ReportedDebugFuncs.end());
+
+  ReportedDebugFuncs[addr];
+  return 0;
+}
+
+int write_debug_line_info(op_agent_t agent,
+                          void const* code,
+                          size_t num_entries,
+                          struct debug_line_info const* info)
+{
+  EXPECT_EQ(globalAgent, agent);
+
+  //verify code has been loaded first
+  uint64_t addr = reinterpret_cast<uint64_t>(code);
+  NativeCodeMap::iterator i = ReportedDebugFuncs.find(addr);
+  EXPECT_TRUE(i != ReportedDebugFuncs.end());
+
+  NativeDebugList NativeInfo(info, info + num_entries);
+
+  SourceLocations locs;
+  for(NativeDebugList::iterator i = NativeInfo.begin();
+      i != NativeInfo.end();
+      ++i) {
+    locs.push_back(std::make_pair(std::string(i->filename), i->lineno));
+  }
+  ReportedDebugFuncs[addr] = locs;
+
+  return 0;
+}
+
+int unload_native_code(op_agent_t agent, uint64_t addr) {
+  EXPECT_EQ(globalAgent, agent);
+
+  //verify that something for the given JIT addr has been loaded first
+  NativeCodeMap::iterator i = ReportedDebugFuncs.find(addr);
+  EXPECT_TRUE(i != ReportedDebugFuncs.end());
+  ReportedDebugFuncs.erase(i);
+  return 0;
+}
+
+int version() {
+  return 1;
+}
+
+bool is_oprofile_running() {
+  return true;
+}
+
+} //namespace test_opagent
+
+class OProfileJITEventListenerTest
+: public JITEventListenerTestBase<OProfileWrapper>
+{
+public:
+  OProfileJITEventListenerTest()
+  : JITEventListenerTestBase<OProfileWrapper>(
+    new OProfileWrapper(test_opagent::open_agent,
+      test_opagent::close_agent,
+      test_opagent::write_native_code,
+      test_opagent::write_debug_line_info,
+      test_opagent::unload_native_code,
+      test_opagent::version,
+      test_opagent::version,
+      test_opagent::is_oprofile_running))
+  {
+    EXPECT_TRUE(0 != MockWrapper);
+
+    Listener.reset(JITEventListener::createOProfileJITEventListener(
+      MockWrapper.get()));
+    EXPECT_TRUE(0 != Listener);
+    EE->RegisterJITEventListener(Listener.get());
+  }
+};
+
+TEST_F(OProfileJITEventListenerTest, NoDebugInfo) {
+  TestNoDebugInfo(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, SingleLine) {
+  TestSingleLine(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, MultipleLines) {
+  TestMultipleLines(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, MultipleFiles) {
+  TestMultipleFiles(ReportedDebugFuncs);
+}
+
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index d4ef92f..a0395cd 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -10,9 +10,7 @@
 LEVEL = ../..
 TESTNAME = ExecutionEngine
 LINK_COMPONENTS := engine interpreter
-
-include $(LEVEL)/Makefile.config
-
 PARALLEL_DIRS = JIT
 
+include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index cdcc496..c8d90aa 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2023,9 +2023,9 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the static custom operand parsing table;
   OS << "namespace {\n";
   OS << "  struct OperandMatchEntry {\n";
-  OS << "    static const char *MnemonicTable;\n";
+  OS << "    static const char *const MnemonicTable;\n";
   OS << "    unsigned OperandMask;\n";
-  OS << "    uint16_t Mnemonic;\n";
+  OS << "    unsigned Mnemonic;\n";
   OS << "    " << getMinimalTypeForRange(Info.Classes.size())
                << " Class;\n";
   OS << "    " << getMinimalTypeForRange(1ULL << Info.SubtargetFeatures.size())
@@ -2097,7 +2097,7 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "};\n\n";
 
-  OS << "const char *OperandMatchEntry::MnemonicTable =\n";
+  OS << "const char *const OperandMatchEntry::MnemonicTable =\n";
   StringTable.EmitString(OS);
   OS << ";\n\n";
 
@@ -2320,9 +2320,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // following the mnemonic.
   OS << "namespace {\n";
   OS << "  struct MatchEntry {\n";
-  OS << "    static const char *MnemonicTable;\n";
+  OS << "    static const char *const MnemonicTable;\n";
   OS << "    uint16_t Opcode;\n";
-  OS << "    uint16_t Mnemonic;\n";
+  OS << "    unsigned Mnemonic;\n";
   OS << "    " << getMinimalTypeForRange(Info.Matchables.size())
                << " ConvertFn;\n";
   OS << "    " << getMinimalTypeForRange(Info.Classes.size())
@@ -2390,7 +2390,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   OS << "};\n\n";
 
-  OS << "const char *MatchEntry::MnemonicTable =\n";
+  OS << "const char *const MatchEntry::MnemonicTable =\n";
   StringTable.EmitString(OS);
   OS << ";\n\n";
 
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index fcdaa08..afbb3a8 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -96,7 +96,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
         O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n";
       } else {
-        O << IndentStr << "static const unsigned RegList" << ++Counter
+        O << IndentStr << "static const uint16_t RegList" << ++Counter
           << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
@@ -127,7 +127,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         unsigned RegListNumber = ++Counter;
         unsigned ShadowRegListNumber = ++Counter;
 
-        O << IndentStr << "static const unsigned RegList" << RegListNumber
+        O << IndentStr << "static const uint16_t RegList" << RegListNumber
           << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
@@ -136,7 +136,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         }
         O << "\n" << IndentStr << "};\n";
 
-        O << IndentStr << "static const unsigned RegList"
+        O << IndentStr << "static const uint16_t RegList"
           << ShadowRegListNumber << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = ShadowRegList->getSize(); i != e; ++i) {
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 2a1bcab..ed8ab79 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -163,7 +163,7 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
       --bit;
     }
      
-    unsigned opMask = ~0U >> (32-N);
+    uint64_t opMask = ~(uint64_t)0 >> (64-N);
     int opShift = beginVarBit - N + 1;
     opMask <<= opShift;
     opShift = beginInstBit - beginVarBit;
@@ -228,7 +228,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     o << "CodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {\n";
 
   // Emit instruction base values
-  o << "  static const unsigned InstBits[] = {\n";
+  o << "  static const uint64_t InstBits[] = {\n";
   for (std::vector<const CodeGenInstruction*>::const_iterator
           IN = NumberedInstructions.begin(),
           EN = NumberedInstructions.end();
@@ -245,10 +245,10 @@ void CodeEmitterGen::run(raw_ostream &o) {
     BitsInit *BI = R->getValueAsBitsInit("Inst");
 
     // Start by filling in fixed values.
-    unsigned Value = 0;
+    uint64_t Value = 0;
     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
       if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1)))
-        Value |= B->getValue() << (e-i-1);
+        Value |= (uint64_t)B->getValue() << (e-i-1);
     }
     o << "    UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n";
   }
@@ -276,8 +276,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
   // Emit initial function code
   o << "  const unsigned opcode = MI.getOpcode();\n"
-    << "  unsigned Value = InstBits[opcode];\n"
-    << "  unsigned op = 0;\n"
+    << "  uint64_t Value = InstBits[opcode];\n"
+    << "  uint64_t op = 0;\n"
     << "  (void)op;  // suppress warning\n"
     << "  switch (opcode) {\n";
 
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 5721121..4abf54e 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -15,8 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/TableGen/Record.h"
 #include "CodeGenTarget.h"
 #include "DFAPacketizerEmitter.h"
diff --git a/utils/TableGen/DFAPacketizerEmitter.h b/utils/TableGen/DFAPacketizerEmitter.h
index 8f47dde..1727150 100644
--- a/utils/TableGen/DFAPacketizerEmitter.h
+++ b/utils/TableGen/DFAPacketizerEmitter.h
@@ -16,8 +16,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <map>
 #include <string>
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 70d07bc..1b473d3 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -579,8 +579,8 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("VecListThreeD");
   REG("VecListFourD");
   REG("VecListOneDAllLanes");
-  REG("VecListTwoDAllLanes");
-  REG("VecListTwoQAllLanes");
+  REG("VecListDPairAllLanes");
+  REG("VecListDPairSpacedAllLanes");
 
   IMM("i32imm");
   IMM("fbits16");
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 19e86db..10e04a6 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -49,7 +49,7 @@ static bool ValueNotSet(bit_value_t V) {
 static int Value(bit_value_t V) {
   return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
 }
-static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
   if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
     return bit->getValue() ? BIT_TRUE : BIT_FALSE;
 
@@ -57,7 +57,7 @@ static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
   return BIT_UNSET;
 }
 // Prints the bit value for each position.
-static void dumpBits(raw_ostream &o, BitsInit &bits) {
+static void dumpBits(raw_ostream &o, const BitsInit &bits) {
   unsigned index;
 
   for (index = bits.getNumBits(); index > 0; index--) {
@@ -126,7 +126,7 @@ typedef std::vector<bit_value_t> insn_t;
 /// version and return the Opcode since the two have the same Asm format string.
 class Filter {
 protected:
-  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  const FilterChooser *Owner;// points to the FilterChooser who owns this filter
   unsigned StartBit; // the starting bit position
   unsigned NumBits; // number of bits to filter
   bool Mixed; // a mixed region contains both set and unset bits
@@ -138,7 +138,7 @@ protected:
   std::vector<unsigned> VariableInstructions;
 
   // Map of well-known segment value to its delegate.
-  std::map<unsigned, FilterChooser*> FilterChooserMap;
+  std::map<unsigned, const FilterChooser*> FilterChooserMap;
 
   // Number of instructions which fall under FilteredInstructions category.
   unsigned NumFiltered;
@@ -146,19 +146,15 @@ protected:
   // Keeps track of the last opcode in the filtered bucket.
   unsigned LastOpcFiltered;
 
-  // Number of instructions which fall under VariableInstructions category.
-  unsigned NumVariable;
-
 public:
-  unsigned getNumFiltered() { return NumFiltered; }
-  unsigned getNumVariable() { return NumVariable; }
-  unsigned getSingletonOpc() {
+  unsigned getNumFiltered() const { return NumFiltered; }
+  unsigned getSingletonOpc() const {
     assert(NumFiltered == 1);
     return LastOpcFiltered;
   }
   // Return the filter chooser for the group of instructions without constant
   // segment values.
-  FilterChooser &getVariableFC() {
+  const FilterChooser &getVariableFC() const {
     assert(NumFiltered == 1);
     assert(FilterChooserMap.size() == 1);
     return *(FilterChooserMap.find((unsigned)-1)->second);
@@ -178,7 +174,7 @@ public:
   void recurse();
 
   // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation);
+  void emit(raw_ostream &o, unsigned &Indentation) const;
 
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
@@ -218,10 +214,10 @@ protected:
   const std::vector<const CodeGenInstruction*> &AllInstructions;
 
   // Vector of uid's for this filter chooser to work on.
-  const std::vector<unsigned> Opcodes;
+  const std::vector<unsigned> &Opcodes;
 
   // Lookup table for the operand decoding of instructions.
-  std::map<unsigned, std::vector<OperandInfo> > &Operands;
+  const std::map<unsigned, std::vector<OperandInfo> > &Operands;
 
   // Vector of candidate filters.
   std::vector<Filter> Filters;
@@ -231,7 +227,7 @@ protected:
   std::vector<bit_value_t> FilterBitValues;
 
   // Links to the FilterChooser above us in the decoding tree.
-  FilterChooser *Parent;
+  const FilterChooser *Parent;
 
   // Index of the best filter from Filters.
   int BestIndex;
@@ -243,19 +239,19 @@ protected:
   const FixedLenDecoderEmitter *Emitter;
 
 public:
-  FilterChooser(const FilterChooser &FC) :
-    AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+  FilterChooser(const FilterChooser &FC)
+    : AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
       Operands(FC.Operands), Filters(FC.Filters),
       FilterBitValues(FC.FilterBitValues), Parent(FC.Parent),
-    BestIndex(FC.BestIndex), BitWidth(FC.BitWidth),
-    Emitter(FC.Emitter) { }
+      BestIndex(FC.BestIndex), BitWidth(FC.BitWidth),
+      Emitter(FC.Emitter) { }
 
   FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
                 const std::vector<unsigned> &IDs,
-    std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
                 unsigned BW,
-                const FixedLenDecoderEmitter *E) :
-      AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
+                const FixedLenDecoderEmitter *E)
+    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
       Parent(NULL), BestIndex(-1), BitWidth(BW), Emitter(E) {
     for (unsigned i = 0; i < BitWidth; ++i)
       FilterBitValues.push_back(BIT_UNFILTERED);
@@ -265,10 +261,10 @@ public:
 
   FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
                 const std::vector<unsigned> &IDs,
-        std::map<unsigned, std::vector<OperandInfo> > &Ops,
-                std::vector<bit_value_t> &ParentFilterBitValues,
-                FilterChooser &parent) :
-      AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::vector<bit_value_t> &ParentFilterBitValues,
+                const FilterChooser &parent)
+    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
       Filters(), FilterBitValues(ParentFilterBitValues),
       Parent(&parent), BestIndex(-1), BitWidth(parent.BitWidth),
       Emitter(parent.Emitter) {
@@ -276,10 +272,11 @@ public:
   }
 
   // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() { return Parent == NULL; }
+  bool isTopLevel() const { return Parent == NULL; }
 
   // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned Indentation, std::string Namespace);
+  void emitTop(raw_ostream &o, unsigned Indentation,
+               const std::string &Namespace) const;
 
 protected:
   // Populates the insn given the uid.
@@ -313,15 +310,16 @@ protected:
   // Returns false if there exists any uninitialized bit value in the range.
   // Returns true, otherwise.
   bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
-      unsigned NumBits) const;
+                     unsigned NumBits) const;
 
   /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
   /// filter array as a series of chars.
-  void dumpFilterArray(raw_ostream &o, std::vector<bit_value_t> & filter);
+  void dumpFilterArray(raw_ostream &o,
+                       const std::vector<bit_value_t> & filter) const;
 
   /// dumpStack - dumpStack traverses the filter chooser chain and calls
   /// dumpFilterArray on each filter chooser up to the top level one.
-  void dumpStack(raw_ostream &o, const char *prefix);
+  void dumpStack(raw_ostream &o, const char *prefix) const;
 
   Filter &bestFilter() {
     assert(BestIndex != -1 && "BestIndex not set");
@@ -329,9 +327,9 @@ protected:
   }
 
   // Called from Filter::recurse() when singleton exists.  For debug purpose.
-  void SingletonExists(unsigned Opc);
+  void SingletonExists(unsigned Opc) const;
 
-  bool PositionFiltered(unsigned i) {
+  bool PositionFiltered(unsigned i) const {
     return ValueSet(FilterBitValues[i]);
   }
 
@@ -340,33 +338,37 @@ protected:
   // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
   // decoded bits in order to verify that the instruction matches the Opcode.
   unsigned getIslands(std::vector<unsigned> &StartBits,
-      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-      insn_t &Insn);
+                      std::vector<unsigned> &EndBits,
+                      std::vector<uint64_t> &FieldVals,
+                      const insn_t &Insn) const;
 
   // Emits code to check the Predicates member of an instruction are true.
   // Returns true if predicate matches were emitted, false otherwise.
-  bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+  bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
+                          unsigned Opc) const;
 
-  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation, unsigned Opc);
+  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
+                         unsigned Opc) const;
 
   // Emits code to decode the singleton.  Return true if we have matched all the
   // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            unsigned Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            const Filter &Best) const;
 
   void emitBinaryParser(raw_ostream &o , unsigned &Indentation,
-                        OperandInfo &OpInfo);
+                        const OperandInfo &OpInfo) const;
 
   // Assign a single filter and run with it.
-  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
-      bool mixed);
+  void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
 
   // reportRegion is a helper function for filterProcessor to mark a region as
   // eligible for use as a filter region.
   void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
-      bool AllowMixed);
+                    bool AllowMixed);
 
   // FilterProcessor scans the well-known encoding bits of the instructions and
   // builds up a list of candidate filters.  It chooses the best filter and
@@ -381,31 +383,30 @@ protected:
   // Emits code to decode our share of instructions.  Returns true if the
   // emitted code causes a return, which occurs if we know how to decode
   // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation);
+  bool emit(raw_ostream &o, unsigned &Indentation) const;
 };
 
 ///////////////////////////
 //                       //
-// Filter Implmenetation //
+// Filter Implementation //
 //                       //
 ///////////////////////////
 
-Filter::Filter(const Filter &f) :
-  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
-  FilteredInstructions(f.FilteredInstructions),
-  VariableInstructions(f.VariableInstructions),
-  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
-  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+Filter::Filter(const Filter &f)
+  : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+    FilteredInstructions(f.FilteredInstructions),
+    VariableInstructions(f.VariableInstructions),
+    FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+    LastOpcFiltered(f.LastOpcFiltered) {
 }
 
 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
-    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
-                  Mixed(mixed) {
+               bool mixed)
+  : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
   assert(StartBit + NumBits - 1 < Owner->BitWidth);
 
   NumFiltered = 0;
   LastOpcFiltered = 0;
-  NumVariable = 0;
 
   for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
     insn_t Insn;
@@ -424,10 +425,9 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
       FilteredInstructions[Field].push_back(LastOpcFiltered);
       ++NumFiltered;
     } else {
-      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // Some of the encoding bit(s) are unspecified.  This contributes to
       // one additional member of "Variable" instructions.
       VariableInstructions.push_back(Owner->Opcodes[i]);
-      ++NumVariable;
     }
   }
 
@@ -436,7 +436,7 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
 }
 
 Filter::~Filter() {
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  std::map<unsigned, const FilterChooser*>::iterator filterIterator;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
@@ -465,7 +465,7 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, const FilterChooser*>(
                               (unsigned)-1,
                               new FilterChooser(Owner->AllInstructions,
                                                 VariableInstructions,
@@ -498,7 +498,7 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for further processing on this
     // category of instructions.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, const FilterChooser*>(
                               mapIterator->first,
                               new FilterChooser(Owner->AllInstructions,
                                                 mapIterator->second,
@@ -510,7 +510,7 @@ void Filter::recurse() {
 }
 
 // Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+void Filter::emit(raw_ostream &o, unsigned &Indentation) const {
   o.indent(Indentation) << "// Check Inst{";
 
   if (NumBits > 1)
@@ -522,7 +522,7 @@ void Filter::emit(raw_ostream &o, unsigned &Indentation) {
                         << "(insn, " << StartBit << ", "
                         << NumBits << ")) {\n";
 
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  std::map<unsigned, const FilterChooser*>::const_iterator filterIterator;
 
   bool DefaultCase = false;
   for (filterIterator = FilterChooserMap.begin();
@@ -586,7 +586,7 @@ unsigned Filter::usefulness() const {
 
 // Emit the top level typedef and decodeInstruction() function.
 void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
-                            std::string Namespace) {
+                            const std::string &Namespace) const {
   o.indent(Indentation) <<
     "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction"
     << BitWidth << "(MCInst &MI, uint" << BitWidth
@@ -617,7 +617,7 @@ void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
 // Returns false if and on the first uninitialized bit value encountered.
 // Returns true, otherwise.
 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
-    unsigned StartBit, unsigned NumBits) const {
+                                  unsigned StartBit, unsigned NumBits) const {
   Field = 0;
 
   for (unsigned i = 0; i < NumBits; ++i) {
@@ -634,7 +634,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
 /// filter array as a series of chars.
 void FilterChooser::dumpFilterArray(raw_ostream &o,
-                                    std::vector<bit_value_t> &filter) {
+                                 const std::vector<bit_value_t> &filter) const {
   unsigned bitIndex;
 
   for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
@@ -657,8 +657,8 @@ void FilterChooser::dumpFilterArray(raw_ostream &o,
 
 /// dumpStack - dumpStack traverses the filter chooser chain and calls
 /// dumpFilterArray on each filter chooser up to the top level one.
-void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
-  FilterChooser *current = this;
+void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const {
+  const FilterChooser *current = this;
 
   while (current) {
     o << prefix;
@@ -669,7 +669,7 @@ void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
 }
 
 // Called from Filter::recurse() when singleton exists.  For debug purpose.
-void FilterChooser::SingletonExists(unsigned Opc) {
+void FilterChooser::SingletonExists(unsigned Opc) const {
   insn_t Insn0;
   insnWithID(Insn0, Opc);
 
@@ -682,7 +682,7 @@ void FilterChooser::SingletonExists(unsigned Opc) {
   errs() << '\n';
 
   dumpStack(errs(), "\t\t");
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
     const std::string &Name = nameWithID(Opcodes[i]);
 
     errs() << '\t' << Name << " ";
@@ -697,8 +697,9 @@ void FilterChooser::SingletonExists(unsigned Opc) {
 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
 // decoded bits in order to verify that the instruction matches the Opcode.
 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
-    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-    insn_t &Insn) {
+                                   std::vector<unsigned> &EndBits,
+                                   std::vector<uint64_t> &FieldVals,
+                                   const insn_t &Insn) const {
   unsigned Num, BitNo;
   Num = BitNo = 0;
 
@@ -753,17 +754,17 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
 }
 
 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
-                         OperandInfo &OpInfo) {
-  std::string &Decoder = OpInfo.Decoder;
+                                     const OperandInfo &OpInfo) const {
+  const std::string &Decoder = OpInfo.Decoder;
 
   if (OpInfo.numFields() == 1) {
-    OperandInfo::iterator OI = OpInfo.begin();
+    OperandInfo::const_iterator OI = OpInfo.begin();
     o.indent(Indentation) << "  tmp = fieldFromInstruction" << BitWidth
                             << "(insn, " << OI->Base << ", " << OI->Width
                             << ");\n";
   } else {
     o.indent(Indentation) << "  tmp = 0;\n";
-    for (OperandInfo::iterator OI = OpInfo.begin(), OE = OpInfo.end();
+    for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end();
          OI != OE; ++OI) {
       o.indent(Indentation) << "  tmp |= (fieldFromInstruction" << BitWidth
                             << "(insn, " << OI->Base << ", " << OI->Width
@@ -781,7 +782,7 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
 }
 
 static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
-                                     std::string PredicateNamespace) {
+                                     const std::string &PredicateNamespace) {
   if (str[0] == '!')
     o << "!(Bits & " << PredicateNamespace << "::"
       << str.slice(1,str.size()) << ")";
@@ -790,7 +791,7 @@ static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
 }
 
 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
-                                           unsigned Opc) {
+                                       unsigned Opc) const {
   ListInit *Predicates =
     AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
   for (unsigned i = 0; i < Predicates->getSize(); ++i) {
@@ -819,7 +820,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
 }
 
 void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                                      unsigned Opc) {
+                                      unsigned Opc) const {
   BitsInit *SFBits =
     AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
   if (!SFBits) return;
@@ -881,7 +882,7 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
 // Emits code to decode the singleton.  Return true if we have matched all the
 // well-known bits.
 bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) {
+                                         unsigned Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
@@ -902,8 +903,10 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
     o << ") {\n";
     emitSoftFailCheck(o, Indentation+2, Opc);
     o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-    std::vector<OperandInfo>& InsnOperands = Operands[Opc];
-    for (std::vector<OperandInfo>::iterator
+    std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+      Operands.find(Opc);
+    const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+    for (std::vector<OperandInfo>::const_iterator
          I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
       // If a custom instruction decoder was specified, use that.
       if (I->numFields() == 0 && I->Decoder.size()) {
@@ -953,8 +956,10 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
   }
   emitSoftFailCheck(o, Indentation+2, Opc);
   o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-  std::vector<OperandInfo>& InsnOperands = Operands[Opc];
-  for (std::vector<OperandInfo>::iterator
+  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+    Operands.find(Opc);
+  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+  for (std::vector<OperandInfo>::const_iterator
        I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
     // If a custom instruction decoder was specified, use that.
     if (I->numFields() == 0 && I->Decoder.size()) {
@@ -975,7 +980,7 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 
 // Emits code to decode the singleton, and then to decode the rest.
 void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-    Filter &Best) {
+                                         const Filter &Best) const {
 
   unsigned Opc = Best.getSingletonOpc();
 
@@ -991,8 +996,8 @@ void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
-void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
-    unsigned numBit, bool mixed) {
+void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
+                                    bool mixed) {
   Filters.clear();
   Filter F(*this, startBit, numBit, true);
   Filters.push_back(F);
@@ -1003,7 +1008,7 @@ void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
 // reportRegion is a helper function for filterProcessor to mark a region as
 // eligible for use as a filter region.
 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
-    unsigned BitIndex, bool AllowMixed) {
+                                 unsigned BitIndex, bool AllowMixed) {
   if (RA == ATTR_MIXED && AllowMixed)
     Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));
   else if (RA == ATTR_ALL_SET && !AllowMixed)
@@ -1040,8 +1045,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
       // Look for islands of undecoded bits of any instruction.
       if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
         // Found an instruction with island(s).  Now just assign a filter.
-        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
-                        true);
+        runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true);
         return true;
       }
     }
@@ -1272,7 +1276,7 @@ void FilterChooser::doFilter() {
 // Emits code to decode our share of instructions.  Returns true if the
 // emitted code causes a return, which occurs if we know how to decode
 // the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
   if (Opcodes.size() == 1)
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
@@ -1281,11 +1285,11 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
-    Filter &Best = bestFilter();
+    const Filter &Best = Filters[BestIndex];
     if (Best.getNumFiltered() == 1)
       emitSingletonDecoder(o, Indentation, Best);
     else
-      bestFilter().emit(o, Indentation);
+      Best.emit(o, Indentation);
     return false;
   }
 
@@ -1305,7 +1309,7 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
   dumpStack(errs(), "\t\t");
 
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
     const std::string &Name = nameWithID(Opcodes[i]);
 
     errs() << '\t' << Name << " ";
@@ -1317,9 +1321,8 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
   return true;
 }
 
-static bool populateInstruction(const CodeGenInstruction &CGI,
-                                unsigned Opc,
-                      std::map<unsigned, std::vector<OperandInfo> >& Operands){
+static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
+                       std::map<unsigned, std::vector<OperandInfo> > &Operands){
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
@@ -1373,7 +1376,7 @@ static bool populateInstruction(const CodeGenInstruction &CGI,
   }
 
   // For each operand, see if we can figure out where it is encoded.
-  for (std::vector<std::pair<Init*, std::string> >::iterator
+  for (std::vector<std::pair<Init*, std::string> >::const_iterator
        NI = InOutOperands.begin(), NE = InOutOperands.end(); NI != NE; ++NI) {
     std::string Decoder = "";
 
@@ -1518,8 +1521,7 @@ static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) {
 }
 
 // Emits disassembler code for instruction decoding.
-void FixedLenDecoderEmitter::run(raw_ostream &o)
-{
+void FixedLenDecoderEmitter::run(raw_ostream &o) {
   o << "#include \"llvm/MC/MCInst.h\"\n";
   o << "#include \"llvm/Support/DataTypes.h\"\n";
   o << "#include <assert.h>\n";
@@ -1527,14 +1529,15 @@ void FixedLenDecoderEmitter::run(raw_ostream &o)
   o << "namespace llvm {\n\n";
 
   // Parameterize the decoders based on namespace and instruction width.
-  NumberedInstructions = Target.getInstructionsByEnumValue();
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned> > OpcMap;
   std::map<unsigned, std::vector<OperandInfo> > Operands;
 
   for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
     const CodeGenInstruction *Inst = NumberedInstructions[i];
-    Record *Def = Inst->TheDef;
+    const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
         Def->getValueAsBit("isPseudo") ||
@@ -1553,7 +1556,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o)
 
   std::set<unsigned> Sizes;
   for (std::map<std::pair<std::string, unsigned>,
-                std::vector<unsigned> >::iterator
+                std::vector<unsigned> >::const_iterator
        I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) {
     // If we haven't visited this instruction width before, emit the
     // helper method to extract fields.
diff --git a/utils/TableGen/FixedLenDecoderEmitter.h b/utils/TableGen/FixedLenDecoderEmitter.h
index bd5111f..195297c 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.h
+++ b/utils/TableGen/FixedLenDecoderEmitter.h
@@ -39,12 +39,12 @@ struct OperandInfo {
     Fields.push_back(EncodingField(Base, Width, Offset));
   }
 
-  unsigned numFields() { return Fields.size(); }
+  unsigned numFields() const { return Fields.size(); }
 
-  typedef std::vector<EncodingField>::iterator iterator;
+  typedef std::vector<EncodingField>::const_iterator const_iterator;
 
-  iterator begin() { return Fields.begin(); }
-  iterator end()   { return Fields.end();   }
+  const_iterator begin() const { return Fields.begin(); }
+  const_iterator end() const   { return Fields.end();   }
 };
 
 class FixedLenDecoderEmitter : public TableGenBackend {
@@ -57,8 +57,7 @@ public:
                          std::string ROK      = "MCDisassembler::Success",
                          std::string RFail    = "MCDisassembler::Fail",
                          std::string L        = "") :
-    Records(R), Target(R),
-    NumberedInstructions(Target.getInstructionsByEnumValue()),
+    Target(R),
     PredicateNamespace(PredicateNamespace),
     GuardPrefix(GPrefix), GuardPostfix(GPostfix),
     ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
@@ -67,11 +66,7 @@ public:
   void run(raw_ostream &o);
 
 private:
-  RecordKeeper &Records;
   CodeGenTarget Target;
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  std::vector<unsigned> Opcodes;
-  std::map<unsigned, std::vector<OperandInfo> > Operands;
 public:
   std::string PredicateNamespace;
   std::string GuardPrefix, GuardPostfix;
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 4c4cd18..0d27828 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 static void PrintDefList(const std::vector<Record*> &Uses,
                          unsigned Num, raw_ostream &OS) {
-  OS << "static const unsigned ImplicitList" << Num << "[] = { ";
+  OS << "static const uint16_t ImplicitList" << Num << "[] = { ";
   for (unsigned i = 0, e = Uses.size(); i != e; ++i)
     OS << getQualifiedName(Uses[i]) << ", ";
   OS << "0 };\n";
@@ -107,6 +107,11 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       if (Inst.Operands[i].Rec->isSubClassOf("OptionalDefOperand"))
         Res += "|(1<<MCOI::OptionalDef)";
 
+      // Fill in operand type.
+      Res += ", MCOI::";
+      assert(!Inst.Operands[i].OperandType.empty() && "Invalid operand type.");
+      Res += Inst.Operands[i].OperandType;
+
       // Fill in constraint info.
       Res += ", ";
 
@@ -122,11 +127,6 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
                     " << 16) | (1 << MCOI::TIED_TO))";
       }
 
-      // Fill in operand type.
-      Res += ", MCOI::";
-      assert(!Inst.Operands[i].OperandType.empty() && "Invalid operand type.");
-      Res += Inst.Operands[i].OperandType;
-
       Result.push_back(Res);
     }
   }
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index d3e2389..802d112 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -100,8 +100,11 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
     throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
                                  "' operand count mismatch");
 
+  unsigned NumMIOperands = 0;
+  for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i)
+    NumMIOperands += Insn.Operands[i].MINumOperands;
   IndexedMap<OpData> OperandMap;
-  OperandMap.grow(Insn.Operands.size());
+  OperandMap.grow(NumMIOperands);
 
   addDagOperandMapping(Rec, Dag, Insn, OperandMap, 0);
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index c949a25..2380e23 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -31,6 +31,9 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
                               CodeGenTarget &Target, CodeGenRegBank &Bank) {
   const std::vector<CodeGenRegister*> &Registers = Bank.getRegisters();
 
+  // Register enums are stored as uint16_t in the tables. Make sure we'll fit
+  assert(Registers.size() <= 0xffff && "Too many regs to fit in tables");
+
   std::string Namespace = Registers[0]->TheDef->getValueAsString("Namespace");
 
   EmitSourceFileHeader("Target Register Enum Values", OS);
@@ -60,6 +63,11 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
 
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = Bank.getRegClasses();
   if (!RegisterClasses.empty()) {
+
+    // RegisterClass enums are stored as uint16_t in the tables.
+    assert(RegisterClasses.size() <= 0xffff &&
+           "Too many register classes to fit in tables");
+
     OS << "\n// Register classes\n";
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
@@ -399,6 +407,13 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = *RegisterClasses[rc];
+
+    // Asserts to make sure values will fit in table assuming types from
+    // MCRegisterInfo.h
+    assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large.");
+    assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large.");
+    assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large.");
+
     OS << "  { " << '\"' << RC.getName() << "\", "
        << RC.getName() << ", " << RC.getName() << "Bits, "
        << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
@@ -415,7 +430,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
   if (SubRegIndices.size()) {
     OS << "const uint16_t " << TargetName << "SubRegTable[]["
-      << SubRegIndices.size() << "] = {\n";
+       << SubRegIndices.size() << "] = {\n";
     for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
       const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
       OS << "  /* " << Regs[i]->TheDef->getName() << " */\n";
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 5bf473d..6bbc929 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -405,13 +405,13 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_STRONG;
     
     
-  // Filter out artificial instructions
+  // Filter out artificial instructions but leave in the LOCK_PREFIX so it is
+  // printed as a separate "instruction".
     
   if (Name.find("_Int") != Name.npos       ||
       Name.find("Int_") != Name.npos       ||
       Name.find("_NOREX") != Name.npos     ||
-      Name.find("2SDL") != Name.npos       ||
-      Name == "LOCK_PREFIX")
+      Name.find("2SDL") != Name.npos)
     return FILTER_STRONG;
 
   // Filter out instructions with segment override prefixes.
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 74220b8..b5f7986 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -23,42 +23,55 @@ kUseCloseFDs = not kIsWindows
 # Use temporary files to replace /dev/null on Windows.
 kAvoidDevNull = kIsWindows
 
+# Negate if win32file is not found.
+kHaveWin32File = kIsWindows
+
 def RemoveForce(f):
     try:
         os.remove(f)
     except OSError:
         pass
 
-def WinRename(f_o, f_n):
-    import time
-    retry_cnt = 256
-    while (True):
-        try:
-            os.rename(f_o, f_n)
-            break
-        except WindowsError, (winerror, strerror):
-            retry_cnt = retry_cnt - 1
-            if retry_cnt <= 0:
-                raise
-            elif winerror == 32: # ERROR_SHARING_VIOLATION
-                time.sleep(0.01)
-            else:
-                raise
-
 def WinWaitReleased(f):
-    import random
-    t = "%s%06d" % (f, random.randint(0, 999999))
-    RemoveForce(t)
+    global kHaveWin32File
+    if not kHaveWin32File:
+        return
     try:
-        WinRename(f, t) # rename
-        WinRename(t, f) # restore
-    except WindowsError, (winerror, strerror):
-        if winerror in (2, 3):
-            # 2: ERROR_FILE_NOT_FOUND
-            # 3: ERROR_PATH_NOT_FOUND
-            pass
-        else:
-            raise
+        import time
+        import win32file, pywintypes
+        retry_cnt = 256
+        while True:
+            try:
+                h = win32file.CreateFile(
+                    f,
+                    win32file.GENERIC_READ,
+                    0, # Exclusive
+                    None,
+                    win32file.OPEN_EXISTING,
+                    win32file.FILE_ATTRIBUTE_NORMAL,
+                    None)
+                h.close()
+                return
+            except WindowsError, (winerror, strerror):
+                retry_cnt = retry_cnt - 1
+                if retry_cnt <= 0:
+                    raise
+                elif winerror == 32: # ERROR_SHARING_VIOLATION
+                    pass
+                else:
+                    raise
+            except pywintypes.error, e:
+                retry_cnt = retry_cnt - 1
+                if retry_cnt <= 0:
+                    raise
+                elif e[0]== 32: # ERROR_SHARING_VIOLATION
+                    pass
+                else:
+                    raise
+            time.sleep(0.01)
+    except ImportError, e:
+        kHaveWin32File = False
+        return
 
 def executeCommand(command, cwd=None, env=None):
     p = subprocess.Popen(command, cwd=cwd,
@@ -105,7 +118,6 @@ def executeShCmd(cmd, cfg, cwd, results):
     input = subprocess.PIPE
     stderrTempFiles = []
     opened_files = []
-    written_files = []
     named_temp_files = []
     # To avoid deadlock, we use a single stderr stream for piped
     # output. This is null until we have seen some output using
@@ -153,6 +165,7 @@ def executeShCmd(cmd, cfg, cwd, results):
             else:
                 if r[2] is None:
                     if kAvoidDevNull and r[0] == '/dev/null':
+                        r[0] = None
                         r[2] = tempfile.TemporaryFile(mode=r[1])
                     else:
                         r[2] = open(r[0], r[1])
@@ -161,9 +174,7 @@ def executeShCmd(cmd, cfg, cwd, results):
                     # FIXME: Actually, this is probably an instance of PR6753.
                     if r[1] == 'a':
                         r[2].seek(0, 2)
-                    opened_files.append(r[2])
-                    if r[1] in 'aw':
-                        written_files.append(r[0])
+                    opened_files.append(r)
                 result = r[2]
             final_redirects.append(result)
 
@@ -225,7 +236,7 @@ def executeShCmd(cmd, cfg, cwd, results):
     # on Win32, for example). Since we have already spawned the subprocess, our
     # handles have already been transferred so we do not need them anymore.
     for f in opened_files:
-        f.close()
+        f[2].close()
 
     # FIXME: There is probably still deadlock potential here. Yawn.
     procData = [None] * len(procs)
@@ -264,10 +275,11 @@ def executeShCmd(cmd, cfg, cwd, results):
         else:
             exitCode = res
 
-    # Make sure written_files is released by other (child) processes.
-    if (kIsWindows):
-        for f in written_files:
-            WinWaitReleased(f)
+    # Make sure opened_files is released by other (child) processes.
+    if kIsWindows:
+        for f in opened_files:
+            if f[0] is not None:
+                WinWaitReleased(f[0])
 
     # Remove any named temporary files we created.
     for f in named_temp_files:
@@ -459,6 +471,7 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
     substitutions.extend([('%s', sourcepath),
                           ('%S', sourcedir),
                           ('%p', sourcedir),
+                          ('%{pathsep}', os.pathsep),
                           ('%t', tmpBase + '.tmp'),
                           ('%T', tmpDir),
                           # FIXME: Remove this once we kill DejaGNU.
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 7f511d1..78bf268 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -21,6 +21,7 @@ class TestingConfig:
 
             if sys.platform == 'win32':
                 environment.update({
+                        'INCLUDE' : os.environ.get('INCLUDE',''),
                         'PATHEXT' : os.environ.get('PATHEXT',''),
                         'PYTHONUNBUFFERED' : '1',
                         'TEMP' : os.environ.get('TEMP',''),