Merge branch 'upstream' into merge_2

Conflicts: lib/Target/ARM/ARMCodeEmitter.cpp Change-Id: I6702d340c733e9721499b5d85b13b96ad9c14eb5
author: Stephen Hines <srhines@google.com> 2012-08-23 19:08:53 -0700
committer: Stephen Hines <srhines@google.com> 2012-08-23 19:08:53 -0700
commit: 31675153bd2d7617db8cb6aeb58054934c7b9f73 (patch)
tree: c1970fcebc736d4f731db0559a79a7ac5cb0f8bf
parent: 416bb6a168a9316547db6ce3909c515f70a84f52 (diff)
parent: 75dd7f0c4a2b3fb9e9d4d5a0517591810c57ed92 (diff)
download: external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.zip
external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.gz
external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.bz2
594 files changed, 25469 insertions, 15025 deletions
diff --git a/.gitignore b/.gitignore
index ecf2e3e..4e9e137 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,7 @@ cscope.files
 cscope.out
 autoconf/aclocal.m4
 autoconf/autom4te.cache
+compile_commands.json
 
 #==============================================================================#
 # Directories to ignore (do not add trailing '/'s, they skip symlinks).
diff --git a/CMakeLists.txt b/CMakeLists.txt
index bb64db9..df781f5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -99,6 +99,9 @@ else( MSVC )
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
+set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ""
+  CACHE STRING "Semicolon-separated list of experimental targets to build.")
+
 option(BUILD_SHARED_LIBS
   "Build all libraries as shared libraries instead of static" OFF)
 
@@ -125,10 +128,15 @@ if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
   set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
 endif()
 
+set(LLVM_TARGETS_TO_BUILD
+   ${LLVM_TARGETS_TO_BUILD}
+   ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD})
+
 set(LLVM_ENUM_TARGETS "")
 foreach(c ${LLVM_TARGETS_TO_BUILD})
   list(FIND LLVM_ALL_TARGETS ${c} idx)
-  if( idx LESS 0 )
+  list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${c} idy)
+  if( idx LESS 0 AND idy LESS 0 )
     message(FATAL_ERROR "The target `${c}' does not exist.
     It should be one of\n${LLVM_ALL_TARGETS}")
   else()
@@ -241,6 +249,14 @@ option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
 # BEFORE this include, otherwise options will not be correctly set on
 # first cmake run
 include(config-ix)
+
+# By default, we target the host, but this can be overridden at CMake
+# invocation time.
+set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING
+  "Default target for which LLVM will generate code." )
+set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" CACHE STRING
+  "Default target for which LLVM will generate code." )
+
 include(HandleLLVMOptions)
 
 # Verify that we can find a Python interpreter,
@@ -402,6 +418,7 @@ add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
 add_subdirectory(utils/yaml-bench)
 add_subdirectory(utils/obj2yaml)
+add_subdirectory(utils/yaml2obj)
 
 add_subdirectory(projects)
 
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
new file mode 100644
index 0000000..fd7bcda
--- /dev/null
+++ b/CODE_OWNERS.TXT
@@ -0,0 +1,51 @@
+This file is a list of the people responsible for ensuring that patches for a
+particular part of LLVM are reviewed, either by themself or by someone else.
+They are also the gatekeepers for their part of LLVM, with the final word on
+what goes in or not.
+
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts.  The fields are: name (N), email (E), web-address
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S).
+
+N: Evan Cheng
+E: evan.cheng@apple.com
+D: Code generator and all targets
+
+N: Greg Clayton
+D: LLDB
+
+N: Peter Collingbourne
+D: libclc
+
+N: Doug Gregor
+D: Clang Frontend Libraries
+
+N: Tobias Grosser
+D: Polly
+
+N: Howard Hinnant
+D: libc++
+
+N: Anton Korobeynikov
+E: asl@math.spbu.ru
+D: Exception handling, debug information, and Windows codegen
+
+N: Ted Kremenek
+D: Clang Static Analyzer
+
+N: Chris Lattner
+E: sabre@nondot.org
+W: http://nondot.org/~sabre/
+D: Everything not covered by someone else
+
+N: John McCall
+E: rjmccall@apple.com
+D: Clang LLVM IR generation
+
+N: Jakob Olesen
+D: Register allocators and TableGen
+
+N: Duncan Sands
+E: baldrick@free.fr
+D: DragonEgg
diff --git a/Makefile b/Makefile
index ec24862..604696a 100644
--- a/Makefile
+++ b/Makefile
@@ -244,13 +244,13 @@ build-for-llvm-top:
 SVN = svn
 SVN-UPDATE-OPTIONS =
 AWK = awk
-SUB-SVN-DIRS = $(AWK) '/\?\ \ \ \ \ \ / {print $$2}'   \
+SUB-SVN-DIRS = $(AWK) '/I|\?      / {print $$2}'   \
 		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
 		| $(AWK) '/^Path:\ / {print $$2}'
 
 update:
 	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
-	@ $(SVN) status $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
+	@ $(SVN) status --no-ignore $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
 
 happiness: update all check-all
 
diff --git a/Makefile.config.in b/Makefile.config.in
index cc538b3..e3bd2a2 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -218,6 +218,12 @@ RDYNAMIC := @RDYNAMIC@
 #ENABLE_LIBCPP = 0
 ENABLE_LIBCPP = @ENABLE_LIBCPP@
 
+# When ENABLE_CXX11 is enabled, LLVM uses c++11 mode by default to build.
+ENABLE_CXX11 = @ENABLE_CXX11@
+
+# When ENABLE_WERROR is enabled, we'll pass -Werror on the command line
+ENABLE_WERROR = @ENABLE_WERROR@
+
 # When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
 # into the "Release" directories. Otherwise, LLVM code is not optimized and
 # output is put in the "Debug" directories.
@@ -252,6 +258,11 @@ ENABLE_LIBCPP = @ENABLE_LIBCPP@
 #DEBUG_SYMBOLS = 1
 @DEBUG_SYMBOLS@
 
+# When KEEP_SYMBOLS is enabled, installed executables will never have their
+# symbols stripped.
+#KEEP_SYMBOLS = 1
+@KEEP_SYMBOLS@
+
 # The compiler flags to use for optimized builds.
 OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
 
@@ -344,6 +355,10 @@ LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
 # bfd ld / gold --version-script=file
 HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
 
+# Flags to control using libxml2
+LIBXML2_LIBS := @LIBXML2_LIBS@
+LIBXML2_INC  := @LIBXML2_INC@
+
 # Flags to control building support for Intel JIT Events API
 USE_INTEL_JITEVENTS := @USE_INTEL_JITEVENTS@
 INTEL_JITEVENTS_INCDIR := @INTEL_JITEVENTS_INCDIR@
diff --git a/Makefile.rules b/Makefile.rules
index 644c356..289adc2 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -317,6 +317,15 @@ ifeq ($(ENABLE_LIBCPP),1)
   LD.Flags +=  -stdlib=libc++
 endif
 
+ifeq ($(ENABLE_CXX11),1)
+  CXX.Flags += -std=c++11
+endif
+
+ifeq ($(ENABLE_WERROR),1)
+  CXX.Flags += -Werror
+  C.Flags += -Werror
+endif
+
 ifeq ($(ENABLE_PROFILING),1)
   BuildMode := $(BuildMode)+Profile
   CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
diff --git a/autoconf/config.guess b/autoconf/config.guess
index f7dd69e..dd6dcb3 100755
--- a/autoconf/config.guess
+++ b/autoconf/config.guess
@@ -206,6 +206,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
 	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
 	exit ;;
+    *:Bitrig:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
 	exit ;;
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 7d36a06..7fa883e 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -463,6 +463,18 @@ case "$enableval" in
   *) AC_MSG_ERROR([Invalid setting for --enable-libcpp. Use "yes" or "no"]) ;;
 esac
 
+dnl --enable-cxx11 : check whether or not to use -std=c++11 on the command line
+AC_ARG_ENABLE(cxx11,
+              AS_HELP_STRING([--enable-cxx11],
+                             [Use c++11 if available (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CXX11,[1]) ;;
+  no)  AC_SUBST(ENABLE_CXX11,[0]) ;;
+  default) AC_SUBST(ENABLE_CXX11,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-cxx11. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
  --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
@@ -490,6 +502,16 @@ else
   AC_SUBST(DISABLE_ASSERTIONS,[[DISABLE_ASSERTIONS=1]])
 fi
 
+dnl --enable-werror : check whether we want Werror on by default
+AC_ARG_ENABLE(werror,AS_HELP_STRING(
+  --enable-werror,[Compile with -Werror enabled (default is NO)]),, enableval="no")
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_WERROR,[1]) ;;
+  no)  AC_SUBST(ENABLE_WERROR,[0]) ;;
+  default) AC_SUBST(ENABLE_WERROR,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-werror. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
 AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
   --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
@@ -520,6 +542,15 @@ else
   AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
 fi
 
+dnl --enable-keep-symbols : do not strip installed executables
+AC_ARG_ENABLE(keep-symbols,
+   AS_HELP_STRING(--enable-keep-symbols,[Do not strip installed executables)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(KEEP_SYMBOLS,[[]])
+else
+  AC_SUBST(KEEP_SYMBOLS,[[KEEP_SYMBOLS=1]])
+fi
+
 dnl --enable-jit: check whether they want to enable the jit
 AC_ARG_ENABLE(jit,
   AS_HELP_STRING(--enable-jit,
@@ -695,6 +726,17 @@ case "$enableval" in
   done
   ;;
 esac
+
+AC_ARG_ENABLE([experimental-targets],AS_HELP_STRING([--enable-experimental-targets],
+    [Build experimental host targets: disable or target1,target2,...
+     (default=disable)]),,
+    enableval=disable)
+
+if test ${enableval} != "disable"
+then
+  TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+fi
+
 AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
 
 dnl Determine whether we are building LLVM support for the native architecture.
@@ -1280,6 +1322,23 @@ AC_ARG_WITH(intel-jitevents,
 AC_DEFINE_UNQUOTED([LLVM_USE_INTEL_JITEVENTS],$USE_INTEL_JITEVENTS,
                    [Define if we have the Intel JIT API runtime support library])
 
+dnl Check for libxml2
+dnl Right now we're just checking for the existence, we could also check for a
+dnl particular version via --version on xml2-config
+AC_CHECK_PROGS(XML2CONFIG, xml2-config)
+
+AC_MSG_CHECKING(for libxml2 includes)
+if test "x$XML2CONFIG" = "x"; then
+ AC_MSG_RESULT(xml2-config not found)
+else
+ LIBXML2_INC=`$XML2CONFIG --cflags`
+ AC_MSG_RESULT($LIBXML2_INC)
+ AC_CHECK_LIB(xml2, xmlReadFile,[AC_DEFINE([CLANG_HAVE_LIBXML],1,[Define if we have libxml2])
+                                LIBXML2_LIBS="-lxml2"])
+fi
+AC_SUBST(LIBXML2_LIBS)
+AC_SUBST(LIBXML2_INC)
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 6: Check for header files
diff --git a/autoconf/ltmain.sh b/autoconf/ltmain.sh
index 2455278..21ace01 100644
--- a/autoconf/ltmain.sh
+++ b/autoconf/ltmain.sh
@@ -1560,7 +1560,7 @@ EOF
 	    # These systems don't actually have a C library (as such)
 	    test "X$arg" = "X-lc" && continue
 	    ;;
-	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*)
 	    # Do not include libc due to us having libc/libc_r.
 	    test "X$arg" = "X-lc" && continue
 	    ;;
@@ -1580,7 +1580,7 @@ EOF
 	  esac
 	elif test "X$arg" = "X-lc_r"; then
 	 case $host in
-	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | -*-*-bitrig*)
 	   # Do not include libc_r directly, use -pthread flag.
 	   continue
 	   ;;
@@ -3464,7 +3464,7 @@ EOF
 	  *-*-netbsd*)
 	    # Don't link with libc until the a.out ld.so is fixed.
 	    ;;
-	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*)
 	    # Do not include libc due to us having libc/libc_r.
 	    ;;
 	  *-*-sco3.2v5* | *-*-sco5v6*)
diff --git a/autoconf/m4/libtool.m4 b/autoconf/m4/libtool.m4
index 36ac3d1..05af7a2 100644
--- a/autoconf/m4/libtool.m4
+++ b/autoconf/m4/libtool.m4
@@ -176,7 +176,7 @@ old_postuninstall_cmds=
 
 if test -n "$RANLIB"; then
   case $host_os in
-  openbsd*)
+  openbsd* | bitrig*)
     old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
     ;;
   *)
@@ -729,7 +729,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
     lt_cv_sys_max_cmd_len=8192;
     ;;
 
-  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly* | bitrig*)
     # This has been around since 386BSD, at least.  Likely further.
     if test -x /sbin/sysctl; then
       lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
@@ -1631,7 +1631,7 @@ nto-qnx*)
   shlibpath_overrides_runpath=yes
   ;;
 
-openbsd*)
+openbsd* | bitrig*)
   version_type=sunos
   sys_lib_dlsearch_path_spec="/usr/lib"
   need_lib_prefix=no
@@ -3382,7 +3382,7 @@ case $host_os in
     # C++ shared libraries are fairly broken
     _LT_AC_TAGVAR(ld_shlibs, $1)=no
     ;;
-  openbsd*)
+  openbsd* | bitrig*)
     _LT_AC_TAGVAR(hardcode_direct, $1)=yes
     _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
     _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
@@ -6003,7 +6003,7 @@ _LT_EOF
       _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
       ;;
 
-    openbsd*)
+    openbsd* | bitrig*)
       _LT_AC_TAGVAR(hardcode_direct, $1)=yes
       _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
       if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 25d6211..94cc555 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -294,9 +294,7 @@ get_host_triple(LLVM_HOST_TRIPLE)
 
 # By default, we target the host, but this can be overridden at CMake
 # invocation time.
-set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}")
 set(LLVM_HOSTTRIPLE "${LLVM_HOST_TRIPLE}")
-set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 
 # Determine the native architecture.
 string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
@@ -324,6 +322,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "xcore")
   set(LLVM_NATIVE_ARCH XCore)
 elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
   set(LLVM_NATIVE_ARCH MSP430)
+elseif (LLVM_NATIVE_ARCH MATCHES "hexagon")
+  set(LLVM_NATIVE_ARCH Hexagon)
 else ()
   message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
 endif ()
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 5f0c1de..f44a27c 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -271,11 +271,6 @@ function(add_lit_target target comment)
     COMMAND ${LIT_COMMAND} ${ARG_DEFAULT_ARGS}
     COMMENT "${comment}"
     )
-
-  # It would be nice to use the DEPENDS clause in add_custom_target above, but
-  # that has bugs with the CMake 2.8.0 installed on Ubuntu Lucid when the entry
-  # in the depends is another custom target. Instead we add them through an
-  # explicit add_dependencies.
   add_dependencies(${target} ${ARG_DEPENDS})
 endfunction()
 
diff --git a/configure b/configure
index fcd9179..6fbc47c 100755
--- a/configure
+++ b/configure
@@ -684,13 +684,16 @@ BUILD_EXEEXT
 BUILD_CXX
 CVSBUILD
 ENABLE_LIBCPP
+ENABLE_CXX11
 ENABLE_OPTIMIZED
 ENABLE_PROFILING
 DISABLE_ASSERTIONS
+ENABLE_WERROR
 ENABLE_EXPENSIVE_CHECKS
 EXPENSIVE_CHECKS
 DEBUG_RUNTIME
 DEBUG_SYMBOLS
+KEEP_SYMBOLS
 JIT
 TARGET_HAS_JIT
 ENABLE_DOCS
@@ -765,6 +768,9 @@ USE_OPROFILE
 USE_INTEL_JITEVENTS
 INTEL_JITEVENTS_INCDIR
 INTEL_JITEVENTS_LIBDIR
+XML2CONFIG
+LIBXML2_LIBS
+LIBXML2_INC
 HAVE_PTHREAD
 HUGE_VAL_SANITY
 MMAP_FILE
@@ -1390,10 +1396,12 @@ Optional Features:
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --enable-polly          Use polly if available (default is YES)
   --enable-libcpp         Use libc++ if available (default is NO)
+  --enable-cxx11          Use c++11 if available (default is NO)
   --enable-optimized      Compile with optimizations enabled (default is NO)
   --enable-profiling      Compile with profiling enabled (default is NO)
   --enable-assertions     Compile with assertion checks enabled (default is
                           YES)
+  --enable-werror         Compile with -Werror enabled (default is NO)
   --enable-expensive-checks
                           Compile with expensive debug checks enabled (default
                           is NO)
@@ -1401,6 +1409,7 @@ Optional Features:
                           NO)
   --enable-debug-symbols  Build compiler with debug symbols (default is NO if
                           optimization is on and YES if it's off)
+  --enable-keep-symbols   Do not strip installed executables)
   --enable-jit            Enable Just In Time Compiling (default is YES)
   --enable-docs           Build documents (default is YES)
   --enable-doxygen        Build doxygen documentation (default is NO)
@@ -1418,6 +1427,9 @@ Optional Features:
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, arm, mips, spu, hexagon,
                           xcore, msp430, nvptx, and cpp (default=all)
+  --enable-experimental-targets
+                          Build experimental host targets: disable or
+                          target1,target2,... (default=disable)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
   --enable-libffi         Check for the presence of libffi (default is NO)
@@ -5016,6 +5028,25 @@ echo "$as_me: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\""
    { (exit 1); exit 1; }; } ;;
 esac
 
+# Check whether --enable-cxx11 was given.
+if test "${enable_cxx11+set}" = set; then
+  enableval=$enable_cxx11;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_CXX11=1
+ ;;
+  no)  ENABLE_CXX11=0
+ ;;
+  default) ENABLE_CXX11=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-cxx11. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-optimized was given.
 if test "${enable_optimized+set}" = set; then
   enableval=$enable_optimized;
@@ -5061,6 +5092,25 @@ else
 
 fi
 
+# Check whether --enable-werror was given.
+if test "${enable_werror+set}" = set; then
+  enableval=$enable_werror;
+else
+  enableval="no"
+fi
+
+case "$enableval" in
+  yes) ENABLE_WERROR=1
+ ;;
+  no)  ENABLE_WERROR=0
+ ;;
+  default) ENABLE_WERROR=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-werror. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-expensive-checks was given.
 if test "${enable_expensive_checks+set}" = set; then
   enableval=$enable_expensive_checks;
@@ -5110,6 +5160,21 @@ else
 
 fi
 
+# Check whether --enable-keep-symbols was given.
+if test "${enable_keep_symbols+set}" = set; then
+  enableval=$enable_keep_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  KEEP_SYMBOLS=
+
+else
+  KEEP_SYMBOLS=KEEP_SYMBOLS=1
+
+fi
+
 # Check whether --enable-jit was given.
 if test "${enable_jit+set}" = set; then
   enableval=$enable_jit;
@@ -5370,6 +5435,20 @@ echo "$as_me: error: Unrecognized target $a_target" >&2;}
   done
   ;;
 esac
+
+# Check whether --enable-experimental-targets was given.
+if test "${enable_experimental_targets+set}" = set; then
+  enableval=$enable_experimental_targets;
+else
+  enableval=disable
+fi
+
+
+if test ${enableval} != "disable"
+then
+  TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+fi
+
 TARGETS_TO_BUILD=$TARGETS_TO_BUILD
 
 
@@ -8902,7 +8981,7 @@ nto-qnx*)
   shlibpath_overrides_runpath=yes
   ;;
 
-openbsd*)
+openbsd* | bitrig*)
   version_type=sunos
   sys_lib_dlsearch_path_spec="/usr/lib"
   need_lib_prefix=no
@@ -10210,7 +10289,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10213 "configure"
+#line 10292 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -13785,6 +13864,148 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+for ac_prog in xml2-config
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_XML2CONFIG+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$XML2CONFIG"; then
+  ac_cv_prog_XML2CONFIG="$XML2CONFIG" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_XML2CONFIG="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+XML2CONFIG=$ac_cv_prog_XML2CONFIG
+if test -n "$XML2CONFIG"; then
+  { echo "$as_me:$LINENO: result: $XML2CONFIG" >&5
+echo "${ECHO_T}$XML2CONFIG" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$XML2CONFIG" && break
+done
+
+
+{ echo "$as_me:$LINENO: checking for libxml2 includes" >&5
+echo $ECHO_N "checking for libxml2 includes... $ECHO_C" >&6; }
+if test "x$XML2CONFIG" = "x"; then
+ { echo "$as_me:$LINENO: result: xml2-config not found" >&5
+echo "${ECHO_T}xml2-config not found" >&6; }
+else
+ LIBXML2_INC=`$XML2CONFIG --cflags`
+ { echo "$as_me:$LINENO: result: $LIBXML2_INC" >&5
+echo "${ECHO_T}$LIBXML2_INC" >&6; }
+ { echo "$as_me:$LINENO: checking for xmlReadFile in -lxml2" >&5
+echo $ECHO_N "checking for xmlReadFile in -lxml2... $ECHO_C" >&6; }
+if test "${ac_cv_lib_xml2_xmlReadFile+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lxml2  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xmlReadFile ();
+int
+main ()
+{
+return xmlReadFile ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_xml2_xmlReadFile=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_xml2_xmlReadFile=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_xml2_xmlReadFile" >&5
+echo "${ECHO_T}$ac_cv_lib_xml2_xmlReadFile" >&6; }
+if test $ac_cv_lib_xml2_xmlReadFile = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define CLANG_HAVE_LIBXML 1
+_ACEOF
+
+                                LIBXML2_LIBS="-lxml2"
+fi
+
+fi
+
+
+
 
 
 
@@ -21937,21 +22158,21 @@ BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
 BUILD_CXX!$BUILD_CXX$ac_delim
 CVSBUILD!$CVSBUILD$ac_delim
 ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
+ENABLE_CXX11!$ENABLE_CXX11$ac_delim
 ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
 ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
 DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
+ENABLE_WERROR!$ENABLE_WERROR$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
 LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
-ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
-ENABLE_PIC!$ENABLE_PIC$ac_delim
-ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -21993,6 +22214,9 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+ENABLE_PIC!$ENABLE_PIC$ac_delim
+ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
 ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
@@ -22059,6 +22283,9 @@ USE_OPROFILE!$USE_OPROFILE$ac_delim
 USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
 INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim
 INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim
+XML2CONFIG!$XML2CONFIG$ac_delim
+LIBXML2_LIBS!$LIBXML2_LIBS$ac_delim
+LIBXML2_INC!$LIBXML2_INC$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
@@ -22084,7 +22311,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
deleted file mode 100644
index 651eb96..0000000
--- a/docs/CodeGenerator.html
+++ /dev/null
@@ -1,3190 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>The LLVM Target-Independent Code Generator</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-
-  <style type="text/css">
-    .unknown { background-color: #C0C0C0; text-align: center; }
-    .unknown:before { content: "?" }
-    .no { background-color: #C11B17 }
-    .no:before { content: "N" }
-    .partial { background-color: #F88017 }
-    .yes { background-color: #0F0; }
-    .yes:before { content: "Y" }
-  </style>
-
-</head>
-<body>
-
-<h1>
-  The LLVM Target-Independent Code Generator
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a>
-    <ul>
-      <li><a href="#required">Required components in the code generator</a></li>
-      <li><a href="#high-level-design">The high-level design of the code
-          generator</a></li>
-      <li><a href="#tablegen">Using TableGen for target description</a></li>
-    </ul>
-  </li>
-  <li><a href="#targetdesc">Target description classes</a>
-    <ul>
-      <li><a href="#targetmachine">The <tt>TargetMachine</tt> class</a></li>
-      <li><a href="#targetdata">The <tt>TargetData</tt> class</a></li>
-      <li><a href="#targetlowering">The <tt>TargetLowering</tt> class</a></li>
-      <li><a href="#targetregisterinfo">The <tt>TargetRegisterInfo</tt> class</a></li>
-      <li><a href="#targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a></li>
-      <li><a href="#targetframeinfo">The <tt>TargetFrameInfo</tt> class</a></li>
-      <li><a href="#targetsubtarget">The <tt>TargetSubtarget</tt> class</a></li>
-      <li><a href="#targetjitinfo">The <tt>TargetJITInfo</tt> class</a></li>
-    </ul>
-  </li>
-  <li><a href="#codegendesc">The "Machine" Code Generator classes</a>
-    <ul>
-    <li><a href="#machineinstr">The <tt>MachineInstr</tt> class</a></li>
-    <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
-                                     class</a></li>
-    <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
-    <li><a href="#machineinstrbundle"><tt>MachineInstr Bundles</tt></a></li>
-    </ul>
-  </li>
-  <li><a href="#mc">The "MC" Layer</a>
-    <ul>
-    <li><a href="#mcstreamer">The <tt>MCStreamer</tt> API</a></li>
-    <li><a href="#mccontext">The <tt>MCContext</tt> class</a>
-    <li><a href="#mcsymbol">The <tt>MCSymbol</tt> class</a></li>
-    <li><a href="#mcsection">The <tt>MCSection</tt> class</a></li>
-    <li><a href="#mcinst">The <tt>MCInst</tt> class</a></li>
-    </ul>
-  </li>
-  <li><a href="#codegenalgs">Target-independent code generation algorithms</a>
-    <ul>
-    <li><a href="#instselect">Instruction Selection</a>
-      <ul>
-      <li><a href="#selectiondag_intro">Introduction to SelectionDAGs</a></li>
-      <li><a href="#selectiondag_process">SelectionDAG Code Generation
-                                          Process</a></li>
-      <li><a href="#selectiondag_build">Initial SelectionDAG
-                                        Construction</a></li>
-      <li><a href="#selectiondag_legalize_types">SelectionDAG LegalizeTypes Phase</a></li>
-      <li><a href="#selectiondag_legalize">SelectionDAG Legalize Phase</a></li>
-      <li><a href="#selectiondag_optimize">SelectionDAG Optimization
-                                           Phase: the DAG Combiner</a></li>
-      <li><a href="#selectiondag_select">SelectionDAG Select Phase</a></li>
-      <li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation
-                                        Phase</a></li>
-      <li><a href="#selectiondag_future">Future directions for the
-                                         SelectionDAG</a></li>
-      </ul></li>
-     <li><a href="#liveintervals">Live Intervals</a>
-       <ul>
-       <li><a href="#livevariable_analysis">Live Variable Analysis</a></li>
-       <li><a href="#liveintervals_analysis">Live Intervals Analysis</a></li>
-       </ul></li>
-    <li><a href="#regalloc">Register Allocation</a>
-      <ul>
-      <li><a href="#regAlloc_represent">How registers are represented in
-                                        LLVM</a></li>
-      <li><a href="#regAlloc_howTo">Mapping virtual registers to physical
-                                    registers</a></li>
-      <li><a href="#regAlloc_twoAddr">Handling two address instructions</a></li>
-      <li><a href="#regAlloc_ssaDecon">The SSA deconstruction phase</a></li>
-      <li><a href="#regAlloc_fold">Instruction folding</a></li>
-      <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
-      </ul></li>
-    <li><a href="#codeemit">Code Emission</a></li>
-    <li><a href="#vliw_packetizer">VLIW Packetizer</a>
-      <ul>
-      <li><a href="#vliw_mapping">Mapping from instructions to functional
-                 units</a></li>
-      <li><a href="#vliw_repr">How the packetization tables are
-                             generated and used</a></li>
-      </ul>
-    </li>
-    </ul>
-  </li>
-  <li><a href="#nativeassembler">Implementing a Native Assembler</a></li>
-  
-  <li><a href="#targetimpls">Target-specific Implementation Notes</a>
-    <ul>
-    <li><a href="#targetfeatures">Target Feature Matrix</a></li>
-    <li><a href="#tailcallopt">Tail call optimization</a></li>
-    <li><a href="#sibcallopt">Sibling call optimization</a></li>
-    <li><a href="#x86">The X86 backend</a></li>
-    <li><a href="#ppc">The PowerPC backend</a>
-      <ul>
-      <li><a href="#ppc_abi">LLVM PowerPC ABI</a></li>
-      <li><a href="#ppc_frame">Frame Layout</a></li>
-      <li><a href="#ppc_prolog">Prolog/Epilog</a></li>
-      <li><a href="#ppc_dynamic">Dynamic Allocation</a></li>
-      </ul></li>
-    <li><a href="#ptx">The PTX backend</a></li>
-    </ul></li>
-
-</ol>
-
-<div class="doc_author">
-  <p>Written by the LLVM Team.</p>
-</div>
-
-<div class="doc_warning">
-  <p>Warning: This is a work in progress.</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM target-independent code generator is a framework that provides a
-   suite of reusable components for translating the LLVM internal representation
-   to the machine code for a specified target&mdash;either in assembly form
-   (suitable for a static compiler) or in binary machine code format (usable for
-   a JIT compiler). The LLVM target-independent code generator consists of six
-   main components:</p>
-
-<ol>
-  <li><a href="#targetdesc">Abstract target description</a> interfaces which
-      capture important properties about various aspects of the machine,
-      independently of how they will be used.  These interfaces are defined in
-      <tt>include/llvm/Target/</tt>.</li>
-
-  <li>Classes used to represent the <a href="#codegendesc">code being
-      generated</a> for a target.  These classes are intended to be abstract
-      enough to represent the machine code for <i>any</i> target machine.  These
-      classes are defined in <tt>include/llvm/CodeGen/</tt>. At this level,
-      concepts like "constant pool entries" and "jump tables" are explicitly
-      exposed.</li>
-
-  <li>Classes and algorithms used to represent code as the object file level,
-      the <a href="#mc">MC Layer</a>.  These classes represent assembly level
-      constructs like labels, sections, and instructions.  At this level,
-      concepts like "constant pool entries" and "jump tables" don't exist.</li>
-
-  <li><a href="#codegenalgs">Target-independent algorithms</a> used to implement
-      various phases of native code generation (register allocation, scheduling,
-      stack frame representation, etc).  This code lives
-      in <tt>lib/CodeGen/</tt>.</li>
-
-  <li><a href="#targetimpls">Implementations of the abstract target description
-      interfaces</a> for particular targets.  These machine descriptions make
-      use of the components provided by LLVM, and can optionally provide custom
-      target-specific passes, to build complete code generators for a specific
-      target.  Target descriptions live in <tt>lib/Target/</tt>.</li>
-
-  <li><a href="#jit">The target-independent JIT components</a>.  The LLVM JIT is
-      completely target independent (it uses the <tt>TargetJITInfo</tt>
-      structure to interface for target-specific issues.  The code for the
-      target-independent JIT lives in <tt>lib/ExecutionEngine/JIT</tt>.</li>
-</ol>
-
-<p>Depending on which part of the code generator you are interested in working
-   on, different pieces of this will be useful to you.  In any case, you should
-   be familiar with the <a href="#targetdesc">target description</a>
-   and <a href="#codegendesc">machine code representation</a> classes.  If you
-   want to add a backend for a new target, you will need
-   to <a href="#targetimpls">implement the target description</a> classes for
-   your new target and understand the <a href="LangRef.html">LLVM code
-   representation</a>.  If you are interested in implementing a
-   new <a href="#codegenalgs">code generation algorithm</a>, it should only
-   depend on the target-description and machine code representation classes,
-   ensuring that it is portable.</p>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="required">Required components in the code generator</a>
-</h3>
-
-<div>
-
-<p>The two pieces of the LLVM code generator are the high-level interface to the
-   code generator and the set of reusable components that can be used to build
-   target-specific backends.  The two most important interfaces
-   (<a href="#targetmachine"><tt>TargetMachine</tt></a>
-   and <a href="#targetdata"><tt>TargetData</tt></a>) are the only ones that are
-   required to be defined for a backend to fit into the LLVM system, but the
-   others must be defined if the reusable code generator components are going to
-   be used.</p>
-
-<p>This design has two important implications.  The first is that LLVM can
-   support completely non-traditional code generation targets.  For example, the
-   C backend does not require register allocation, instruction selection, or any
-   of the other standard components provided by the system.  As such, it only
-   implements these two interfaces, and does its own thing. Note that C backend
-   was removed from the trunk since LLVM 3.1 release. Another example of
-   a code generator like this is a (purely hypothetical) backend that converts
-   LLVM to the GCC RTL form and uses GCC to emit machine code for a target.</p>
-
-<p>This design also implies that it is possible to design and implement
-   radically different code generators in the LLVM system that do not make use
-   of any of the built-in components.  Doing so is not recommended at all, but
-   could be required for radically different targets that do not fit into the
-   LLVM machine description model: FPGAs for example.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="high-level-design">The high-level design of the code generator</a>
-</h3>
-
-<div>
-
-<p>The LLVM target-independent code generator is designed to support efficient
-   and quality code generation for standard register-based microprocessors.
-   Code generation in this model is divided into the following stages:</p>
-
-<ol>
-  <li><b><a href="#instselect">Instruction Selection</a></b> &mdash; This phase
-      determines an efficient way to express the input LLVM code in the target
-      instruction set.  This stage produces the initial code for the program in
-      the target instruction set, then makes use of virtual registers in SSA
-      form and physical registers that represent any required register
-      assignments due to target constraints or calling conventions.  This step
-      turns the LLVM code into a DAG of target instructions.</li>
-
-  <li><b><a href="#selectiondag_sched">Scheduling and Formation</a></b> &mdash;
-      This phase takes the DAG of target instructions produced by the
-      instruction selection phase, determines an ordering of the instructions,
-      then emits the instructions
-      as <tt><a href="#machineinstr">MachineInstr</a></tt>s with that ordering.
-      Note that we describe this in the <a href="#instselect">instruction
-      selection section</a> because it operates on
-      a <a href="#selectiondag_intro">SelectionDAG</a>.</li>
-
-  <li><b><a href="#ssamco">SSA-based Machine Code Optimizations</a></b> &mdash;
-      This optional stage consists of a series of machine-code optimizations
-      that operate on the SSA-form produced by the instruction selector.
-      Optimizations like modulo-scheduling or peephole optimization work
-      here.</li>
-
-  <li><b><a href="#regalloc">Register Allocation</a></b> &mdash; The target code
-      is transformed from an infinite virtual register file in SSA form to the
-      concrete register file used by the target.  This phase introduces spill
-      code and eliminates all virtual register references from the program.</li>
-
-  <li><b><a href="#proepicode">Prolog/Epilog Code Insertion</a></b> &mdash; Once
-      the machine code has been generated for the function and the amount of
-      stack space required is known (used for LLVM alloca's and spill slots),
-      the prolog and epilog code for the function can be inserted and "abstract
-      stack location references" can be eliminated.  This stage is responsible
-      for implementing optimizations like frame-pointer elimination and stack
-      packing.</li>
-
-  <li><b><a href="#latemco">Late Machine Code Optimizations</a></b> &mdash;
-      Optimizations that operate on "final" machine code can go here, such as
-      spill code scheduling and peephole optimizations.</li>
-
-  <li><b><a href="#codeemit">Code Emission</a></b> &mdash; The final stage
-      actually puts out the code for the current function, either in the target
-      assembler format or in machine code.</li>
-</ol>
-
-<p>The code generator is based on the assumption that the instruction selector
-   will use an optimal pattern matching selector to create high-quality
-   sequences of native instructions.  Alternative code generator designs based
-   on pattern expansion and aggressive iterative peephole optimization are much
-   slower.  This design permits efficient compilation (important for JIT
-   environments) and aggressive optimization (used when generating code offline)
-   by allowing components of varying levels of sophistication to be used for any
-   step of compilation.</p>
-
-<p>In addition to these stages, target implementations can insert arbitrary
-   target-specific passes into the flow.  For example, the X86 target uses a
-   special pass to handle the 80x87 floating point stack architecture.  Other
-   targets with unusual requirements can be supported with custom passes as
-   needed.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
- <a name="tablegen">Using TableGen for target description</a>
-</h3>
-
-<div>
-
-<p>The target description classes require a detailed description of the target
-   architecture.  These target descriptions often have a large amount of common
-   information (e.g., an <tt>add</tt> instruction is almost identical to a
-   <tt>sub</tt> instruction).  In order to allow the maximum amount of
-   commonality to be factored out, the LLVM code generator uses
-   the <a href="TableGenFundamentals.html">TableGen</a> tool to describe big
-   chunks of the target machine, which allows the use of domain-specific and
-   target-specific abstractions to reduce the amount of repetition.</p>
-
-<p>As LLVM continues to be developed and refined, we plan to move more and more
-   of the target description to the <tt>.td</tt> form.  Doing so gives us a
-   number of advantages.  The most important is that it makes it easier to port
-   LLVM because it reduces the amount of C++ code that has to be written, and
-   the surface area of the code generator that needs to be understood before
-   someone can get something working.  Second, it makes it easier to change
-   things. In particular, if tables and other things are all emitted
-   by <tt>tblgen</tt>, we only need a change in one place (<tt>tblgen</tt>) to
-   update all of the targets to a new interface.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="targetdesc">Target description classes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM target description classes (located in the
-   <tt>include/llvm/Target</tt> directory) provide an abstract description of
-   the target machine independent of any particular client.  These classes are
-   designed to capture the <i>abstract</i> properties of the target (such as the
-   instructions and registers it has), and do not incorporate any particular
-   pieces of code generation algorithms.</p>
-
-<p>All of the target description classes (except the
-   <tt><a href="#targetdata">TargetData</a></tt> class) are designed to be
-   subclassed by the concrete target implementation, and have virtual methods
-   implemented.  To get to these implementations, the
-   <tt><a href="#targetmachine">TargetMachine</a></tt> class provides accessors
-   that should be implemented by the target.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetmachine">The <tt>TargetMachine</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetMachine</tt> class provides virtual methods that are used to
-   access the target-specific implementations of the various target description
-   classes via the <tt>get*Info</tt> methods (<tt>getInstrInfo</tt>,
-   <tt>getRegisterInfo</tt>, <tt>getFrameInfo</tt>, etc.).  This class is
-   designed to be specialized by a concrete target implementation
-   (e.g., <tt>X86TargetMachine</tt>) which implements the various virtual
-   methods.  The only required target description class is
-   the <a href="#targetdata"><tt>TargetData</tt></a> class, but if the code
-   generator components are to be used, the other interfaces should be
-   implemented as well.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetdata">The <tt>TargetData</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetData</tt> class is the only required target description class,
-   and it is the only class that is not extensible (you cannot derived a new
-   class from it).  <tt>TargetData</tt> specifies information about how the
-   target lays out memory for structures, the alignment requirements for various
-   data types, the size of pointers in the target, and whether the target is
-   little-endian or big-endian.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetlowering">The <tt>TargetLowering</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetLowering</tt> class is used by SelectionDAG based instruction
-   selectors primarily to describe how LLVM code should be lowered to
-   SelectionDAG operations.  Among other things, this class indicates:</p>
-
-<ul>
-  <li>an initial register class to use for various <tt>ValueType</tt>s,</li>
-
-  <li>which operations are natively supported by the target machine,</li>
-
-  <li>the return type of <tt>setcc</tt> operations,</li>
-
-  <li>the type to use for shift amounts, and</li>
-
-  <li>various high-level characteristics, like whether it is profitable to turn
-      division by a constant into a multiplication sequence</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetregisterinfo">The <tt>TargetRegisterInfo</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetRegisterInfo</tt> class is used to describe the register file
-   of the target and any interactions between the registers.</p>
-
-<p>Registers in the code generator are represented in the code generator by
-   unsigned integers.  Physical registers (those that actually exist in the
-   target description) are unique small numbers, and virtual registers are
-   generally large.  Note that register #0 is reserved as a flag value.</p>
-
-<p>Each register in the processor description has an associated
-   <tt>TargetRegisterDesc</tt> entry, which provides a textual name for the
-   register (used for assembly output and debugging dumps) and a set of aliases
-   (used to indicate whether one register overlaps with another).</p>
-
-<p>In addition to the per-register description, the <tt>TargetRegisterInfo</tt>
-   class exposes a set of processor specific register classes (instances of the
-   <tt>TargetRegisterClass</tt> class).  Each register class contains sets of
-   registers that have the same properties (for example, they are all 32-bit
-   integer registers).  Each SSA virtual register created by the instruction
-   selector has an associated register class.  When the register allocator runs,
-   it replaces virtual registers with a physical register in the set.</p>
-
-<p>The target-specific implementations of these classes is auto-generated from
-   a <a href="TableGenFundamentals.html">TableGen</a> description of the
-   register file.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetInstrInfo</tt> class is used to describe the machine
-   instructions supported by the target. It is essentially an array of
-   <tt>TargetInstrDescriptor</tt> objects, each of which describes one
-   instruction the target supports. Descriptors define things like the mnemonic
-   for the opcode, the number of operands, the list of implicit register uses
-   and defs, whether the instruction has certain target-independent properties
-   (accesses memory, is commutable, etc), and holds any target-specific
-   flags.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetframeinfo">The <tt>TargetFrameInfo</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetFrameInfo</tt> class is used to provide information about the
-   stack frame layout of the target. It holds the direction of stack growth, the
-   known stack alignment on entry to each function, and the offset to the local
-   area.  The offset to the local area is the offset from the stack pointer on
-   function entry to the first location where function data (local variables,
-   spill locations) can be stored.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetsubtarget">The <tt>TargetSubtarget</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetSubtarget</tt> class is used to provide information about the
-   specific chip set being targeted.  A sub-target informs code generation of
-   which instructions are supported, instruction latencies and instruction
-   execution itinerary; i.e., which processing units are used, in what order,
-   and for how long.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetjitinfo">The <tt>TargetJITInfo</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>TargetJITInfo</tt> class exposes an abstract interface used by the
-   Just-In-Time code generator to perform target-specific activities, such as
-   emitting stubs.  If a <tt>TargetMachine</tt> supports JIT code generation, it
-   should provide one of these objects through the <tt>getJITInfo</tt>
-   method.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="codegendesc">Machine code description classes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>At the high-level, LLVM code is translated to a machine specific
-   representation formed out of
-   <a href="#machinefunction"><tt>MachineFunction</tt></a>,
-   <a href="#machinebasicblock"><tt>MachineBasicBlock</tt></a>,
-   and <a href="#machineinstr"><tt>MachineInstr</tt></a> instances (defined
-   in <tt>include/llvm/CodeGen</tt>).  This representation is completely target
-   agnostic, representing instructions in their most abstract form: an opcode
-   and a series of operands.  This representation is designed to support both an
-   SSA representation for machine code, as well as a register allocated, non-SSA
-   form.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="machineinstr">The <tt>MachineInstr</tt> class</a>
-</h3>
-
-<div>
-
-<p>Target machine instructions are represented as instances of the
-   <tt>MachineInstr</tt> class.  This class is an extremely abstract way of
-   representing machine instructions.  In particular, it only keeps track of an
-   opcode number and a set of operands.</p>
-
-<p>The opcode number is a simple unsigned integer that only has meaning to a
-   specific backend.  All of the instructions for a target should be defined in
-   the <tt>*InstrInfo.td</tt> file for the target. The opcode enum values are
-   auto-generated from this description.  The <tt>MachineInstr</tt> class does
-   not have any information about how to interpret the instruction (i.e., what
-   the semantics of the instruction are); for that you must refer to the
-   <tt><a href="#targetinstrinfo">TargetInstrInfo</a></tt> class.</p> 
-
-<p>The operands of a machine instruction can be of several different types: a
-   register reference, a constant integer, a basic block reference, etc.  In
-   addition, a machine operand should be marked as a def or a use of the value
-   (though only registers are allowed to be defs).</p>
-
-<p>By convention, the LLVM code generator orders instruction operands so that
-   all register definitions come before the register uses, even on architectures
-   that are normally printed in other orders.  For example, the SPARC add
-   instruction: "<tt>add %i1, %i2, %i3</tt>" adds the "%i1", and "%i2" registers
-   and stores the result into the "%i3" register.  In the LLVM code generator,
-   the operands should be stored as "<tt>%i3, %i1, %i2</tt>": with the
-   destination first.</p>
-
-<p>Keeping destination (definition) operands at the beginning of the operand
-   list has several advantages.  In particular, the debugging printer will print
-   the instruction like this:</p>
-
-<div class="doc_code">
-<pre>
-%r3 = add %i1, %i2
-</pre>
-</div>
-
-<p>Also if the first operand is a def, it is easier to <a href="#buildmi">create
-   instructions</a> whose only def is the first operand.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="buildmi">Using the <tt>MachineInstrBuilder.h</tt> functions</a>
-</h4>
-
-<div>
-
-<p>Machine instructions are created by using the <tt>BuildMI</tt> functions,
-   located in the <tt>include/llvm/CodeGen/MachineInstrBuilder.h</tt> file.  The
-   <tt>BuildMI</tt> functions make it easy to build arbitrary machine
-   instructions.  Usage of the <tt>BuildMI</tt> functions look like this:</p>
-
-<div class="doc_code">
-<pre>
-// Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42')
-// instruction.  The '1' specifies how many operands will be added.
-MachineInstr *MI = BuildMI(X86::MOV32ri, 1, DestReg).addImm(42);
-
-// Create the same instr, but insert it at the end of a basic block.
-MachineBasicBlock &amp;MBB = ...
-BuildMI(MBB, X86::MOV32ri, 1, DestReg).addImm(42);
-
-// Create the same instr, but insert it before a specified iterator point.
-MachineBasicBlock::iterator MBBI = ...
-BuildMI(MBB, MBBI, X86::MOV32ri, 1, DestReg).addImm(42);
-
-// Create a 'cmp Reg, 0' instruction, no destination reg.
-MI = BuildMI(X86::CMP32ri, 2).addReg(Reg).addImm(0);
-// Create an 'sahf' instruction which takes no operands and stores nothing.
-MI = BuildMI(X86::SAHF, 0);
-
-// Create a self looping branch instruction.
-BuildMI(MBB, X86::JNE, 1).addMBB(&amp;MBB);
-</pre>
-</div>
-
-<p>The key thing to remember with the <tt>BuildMI</tt> functions is that you
-   have to specify the number of operands that the machine instruction will
-   take.  This allows for efficient memory allocation.  You also need to specify
-   if operands default to be uses of values, not definitions.  If you need to
-   add a definition operand (other than the optional destination register), you
-   must explicitly mark it as such:</p>
-
-<div class="doc_code">
-<pre>
-MI.addReg(Reg, RegState::Define);
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="fixedregs">Fixed (preassigned) registers</a>
-</h4>
-
-<div>
-
-<p>One important issue that the code generator needs to be aware of is the
-   presence of fixed registers.  In particular, there are often places in the
-   instruction stream where the register allocator <em>must</em> arrange for a
-   particular value to be in a particular register.  This can occur due to
-   limitations of the instruction set (e.g., the X86 can only do a 32-bit divide
-   with the <tt>EAX</tt>/<tt>EDX</tt> registers), or external factors like
-   calling conventions.  In any case, the instruction selector should emit code
-   that copies a virtual register into or out of a physical register when
-   needed.</p>
-
-<p>For example, consider this simple LLVM example:</p>
-
-<div class="doc_code">
-<pre>
-define i32 @test(i32 %X, i32 %Y) {
-  %Z = udiv i32 %X, %Y
-  ret i32 %Z
-}
-</pre>
-</div>
-
-<p>The X86 instruction selector produces this machine code for the <tt>div</tt>
-   and <tt>ret</tt> (use "<tt>llc X.bc -march=x86 -print-machineinstrs</tt>" to
-   get this):</p>
-
-<div class="doc_code">
-<pre>
-;; Start of div
-%EAX = mov %reg1024           ;; Copy X (in reg1024) into EAX
-%reg1027 = sar %reg1024, 31
-%EDX = mov %reg1027           ;; Sign extend X into EDX
-idiv %reg1025                 ;; Divide by Y (in reg1025)
-%reg1026 = mov %EAX           ;; Read the result (Z) out of EAX
-
-;; Start of ret
-%EAX = mov %reg1026           ;; 32-bit return value goes in EAX
-ret
-</pre>
-</div>
-
-<p>By the end of code generation, the register allocator has coalesced the
-   registers and deleted the resultant identity moves producing the following
-   code:</p>
-
-<div class="doc_code">
-<pre>
-;; X is in EAX, Y is in ECX
-mov %EAX, %EDX
-sar %EDX, 31
-idiv %ECX
-ret 
-</pre>
-</div>
-
-<p>This approach is extremely general (if it can handle the X86 architecture, it
-   can handle anything!) and allows all of the target specific knowledge about
-   the instruction stream to be isolated in the instruction selector.  Note that
-   physical registers should have a short lifetime for good code generation, and
-   all physical registers are assumed dead on entry to and exit from basic
-   blocks (before register allocation).  Thus, if you need a value to be live
-   across basic block boundaries, it <em>must</em> live in a virtual
-   register.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="callclobber">Call-clobbered registers</a>
-</h4>
-
-<div>
-
-<p>Some machine instructions, like calls, clobber a large number of physical
-   registers.  Rather than adding <code>&lt;def,dead&gt;</code> operands for
-   all of them, it is possible to use an <code>MO_RegisterMask</code> operand
-   instead.  The register mask operand holds a bit mask of preserved registers,
-   and everything else is considered to be clobbered by the instruction.  </p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="ssa">Machine code in SSA form</a>
-</h4>
-
-<div>
-
-<p><tt>MachineInstr</tt>'s are initially selected in SSA-form, and are
-   maintained in SSA-form until register allocation happens.  For the most part,
-   this is trivially simple since LLVM is already in SSA form; LLVM PHI nodes
-   become machine code PHI nodes, and virtual registers are only allowed to have
-   a single definition.</p>
-
-<p>After register allocation, machine code is no longer in SSA-form because
-   there are no virtual registers left in the code.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="machinebasicblock">The <tt>MachineBasicBlock</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>MachineBasicBlock</tt> class contains a list of machine instructions
-   (<tt><a href="#machineinstr">MachineInstr</a></tt> instances).  It roughly
-   corresponds to the LLVM code input to the instruction selector, but there can
-   be a one-to-many mapping (i.e. one LLVM basic block can map to multiple
-   machine basic blocks). The <tt>MachineBasicBlock</tt> class has a
-   "<tt>getBasicBlock</tt>" method, which returns the LLVM basic block that it
-   comes from.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="machinefunction">The <tt>MachineFunction</tt> class</a>
-</h3>
-
-<div>
-
-<p>The <tt>MachineFunction</tt> class contains a list of machine basic blocks
-   (<tt><a href="#machinebasicblock">MachineBasicBlock</a></tt> instances).  It
-   corresponds one-to-one with the LLVM function input to the instruction
-   selector.  In addition to a list of basic blocks,
-   the <tt>MachineFunction</tt> contains a a <tt>MachineConstantPool</tt>,
-   a <tt>MachineFrameInfo</tt>, a <tt>MachineFunctionInfo</tt>, and a
-   <tt>MachineRegisterInfo</tt>.  See
-   <tt>include/llvm/CodeGen/MachineFunction.h</tt> for more information.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="machineinstrbundle"><tt>MachineInstr Bundles</tt></a>
-</h3>
-
-<div>
-
-<p>LLVM code generator can model sequences of instructions as MachineInstr
-   bundles. A MI bundle can model a VLIW group / pack which contains an
-   arbitrary number of parallel instructions. It can also be used to model
-   a sequential list of instructions (potentially with data dependencies) that
-   cannot be legally separated (e.g. ARM Thumb2 IT blocks).</p>
-
-<p>Conceptually a MI bundle is a MI with a number of other MIs nested within:
-</p>
-
-<div class="doc_code">
-<pre>
---------------
-|   Bundle   | ---------
---------------          \
-       |           ----------------
-       |           |      MI      |
-       |           ----------------
-       |                   |
-       |           ----------------
-       |           |      MI      |
-       |           ----------------
-       |                   |
-       |           ----------------
-       |           |      MI      |
-       |           ----------------
-       |
---------------
-|   Bundle   | --------
---------------         \
-       |           ----------------
-       |           |      MI      |
-       |           ----------------
-       |                   |
-       |           ----------------
-       |           |      MI      |
-       |           ----------------
-       |                   |
-       |                  ...
-       |
---------------
-|   Bundle   | --------
---------------         \
-       |
-      ...
-</pre>
-</div>
-
-<p> MI bundle support does not change the physical representations of
-    MachineBasicBlock and MachineInstr. All the MIs (including top level and
-    nested ones) are stored as sequential list of MIs. The "bundled" MIs are
-    marked with the 'InsideBundle' flag. A top level MI with the special BUNDLE
-    opcode is used to represent the start of a bundle. It's legal to mix BUNDLE
-    MIs with indiviual MIs that are not inside bundles nor represent bundles.
-</p>
-
-<p> MachineInstr passes should operate on a MI bundle as a single unit. Member
-    methods have been taught to correctly handle bundles and MIs inside bundles.
-    The MachineBasicBlock iterator has been modified to skip over bundled MIs to
-    enforce the bundle-as-a-single-unit concept. An alternative iterator
-    instr_iterator has been added to MachineBasicBlock to allow passes to
-    iterate over all of the MIs in a MachineBasicBlock, including those which
-    are nested inside bundles. The top level BUNDLE instruction must have the
-    correct set of register MachineOperand's that represent the cumulative
-    inputs and outputs of the bundled MIs.</p>
-
-<p> Packing / bundling of MachineInstr's should be done as part of the register
-    allocation super-pass. More specifically, the pass which determines what
-    MIs should be bundled together must be done after code generator exits SSA
-    form (i.e. after two-address pass, PHI elimination, and copy coalescing).
-    Bundles should only be finalized (i.e. adding BUNDLE MIs and input and
-    output register MachineOperands) after virtual registers have been
-    rewritten into physical registers. This requirement eliminates the need to
-    add virtual register operands to BUNDLE instructions which would effectively
-    double the virtual register def and use lists.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="mc">The "MC" Layer</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The MC Layer is used to represent and process code at the raw machine code
-level, devoid of "high level" information like "constant pools", "jump tables",
-"global variables" or anything like that.  At this level, LLVM handles things
-like label names, machine instructions, and sections in the object file.  The
-code in this layer is used for a number of important purposes: the tail end of
-the code generator uses it to write a .s or .o file, and it is also used by the
-llvm-mc tool to implement standalone machine code assemblers and disassemblers.
-</p>
-
-<p>
-This section describes some of the important classes.  There are also a number
-of important subsystems that interact at this layer, they are described later
-in this manual.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mcstreamer">The <tt>MCStreamer</tt> API</a>
-</h3>
-
-<div>
-
-<p>
-MCStreamer is best thought of as an assembler API.  It is an abstract API which
-is <em>implemented</em> in different ways (e.g. to output a .s file, output an
-ELF .o file, etc) but whose API correspond directly to what you see in a .s
-file.  MCStreamer has one method per directive, such as EmitLabel,
-EmitSymbolAttribute, SwitchSection, EmitValue (for .byte, .word), etc, which
-directly correspond to assembly level directives.  It also has an
-EmitInstruction method, which is used to output an MCInst to the streamer.
-</p>
-
-<p>
-This API is most important for two clients: the llvm-mc stand-alone assembler is
-effectively a parser that parses a line, then invokes a method on MCStreamer. In
-the code generator, the <a href="#codeemit">Code Emission</a> phase of the code
-generator lowers higher level LLVM IR and Machine* constructs down to the MC
-layer, emitting directives through MCStreamer.</p>
-
-<p>
-On the implementation side of MCStreamer, there are two major implementations:
-one for writing out a .s file (MCAsmStreamer), and one for writing out a .o
-file (MCObjectStreamer).  MCAsmStreamer is a straight-forward implementation
-that prints out a directive for each method (e.g. EmitValue -&gt; .byte), but
-MCObjectStreamer implements a full assembler.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mccontext">The <tt>MCContext</tt> class</a>
-</h3>
-
-<div>
-
-<p>
-The MCContext class is the owner of a variety of uniqued data structures at the
-MC layer, including symbols, sections, etc.  As such, this is the class that you
-interact with to create symbols and sections.  This class can not be subclassed.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mcsymbol">The <tt>MCSymbol</tt> class</a>
-</h3>
-
-<div>
-
-<p>
-The MCSymbol class represents a symbol (aka label) in the assembly file.  There
-are two interesting kinds of symbols: assembler temporary symbols, and normal
-symbols.  Assembler temporary symbols are used and processed by the assembler
-but are discarded when the object file is produced.  The distinction is usually
-represented by adding a prefix to the label, for example "L" labels are
-assembler temporary labels in MachO.
-</p>
-
-<p>MCSymbols are created by MCContext and uniqued there.  This means that
-MCSymbols can be compared for pointer equivalence to find out if they are the
-same symbol.  Note that pointer inequality does not guarantee the labels will
-end up at different addresses though.  It's perfectly legal to output something
-like this to the .s file:<p>
-
-<pre>
-  foo:
-  bar:
-    .byte 4
-</pre>
-
-<p>In this case, both the foo and bar symbols will have the same address.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mcsection">The <tt>MCSection</tt> class</a>
-</h3>
-
-<div>
-
-<p>
-The MCSection class represents an object-file specific section. It is subclassed
-by object file specific implementations (e.g. <tt>MCSectionMachO</tt>, 
-<tt>MCSectionCOFF</tt>, <tt>MCSectionELF</tt>) and these are created and uniqued
-by MCContext.  The MCStreamer has a notion of the current section, which can be
-changed with the SwitchToSection method (which corresponds to a ".section"
-directive in a .s file).
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mcinst">The <tt>MCInst</tt> class</a>
-</h3>
-
-<div>
-
-<p>
-The MCInst class is a target-independent representation of an instruction.  It
-is a simple class (much more so than <a href="#machineinstr">MachineInstr</a>)
-that holds a target-specific opcode and a vector of MCOperands.  MCOperand, in
-turn, is a simple discriminated union of three cases: 1) a simple immediate, 
-2) a target register ID, 3) a symbolic expression (e.g. "Lfoo-Lbar+42") as an
-MCExpr.
-</p>
-
-<p>MCInst is the common currency used to represent machine instructions at the
-MC layer.  It is the type used by the instruction encoder, the instruction
-printer, and the type generated by the assembly parser and disassembler.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="codegenalgs">Target-independent code generation algorithms</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section documents the phases described in the
-   <a href="#high-level-design">high-level design of the code generator</a>.
-   It explains how they work and some of the rationale behind their design.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="instselect">Instruction Selection</a>
-</h3>
-
-<div>
-
-<p>Instruction Selection is the process of translating LLVM code presented to
-   the code generator into target-specific machine instructions.  There are
-   several well-known ways to do this in the literature.  LLVM uses a
-   SelectionDAG based instruction selector.</p>
-
-<p>Portions of the DAG instruction selector are generated from the target
-   description (<tt>*.td</tt>) files.  Our goal is for the entire instruction
-   selector to be generated from these <tt>.td</tt> files, though currently
-   there are still things that require custom C++ code.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_intro">Introduction to SelectionDAGs</a>
-</h4>
-
-<div>
-
-<p>The SelectionDAG provides an abstraction for code representation in a way
-   that is amenable to instruction selection using automatic techniques
-   (e.g. dynamic-programming based optimal pattern matching selectors). It is
-   also well-suited to other phases of code generation; in particular,
-   instruction scheduling (SelectionDAG's are very close to scheduling DAGs
-   post-selection).  Additionally, the SelectionDAG provides a host
-   representation where a large variety of very-low-level (but
-   target-independent) <a href="#selectiondag_optimize">optimizations</a> may be
-   performed; ones which require extensive information about the instructions
-   efficiently supported by the target.</p>
-
-<p>The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the
-   <tt>SDNode</tt> class.  The primary payload of the <tt>SDNode</tt> is its
-   operation code (Opcode) that indicates what operation the node performs and
-   the operands to the operation.  The various operation node types are
-   described at the top of the <tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>
-   file.</p>
-
-<p>Although most operations define a single value, each node in the graph may
-   define multiple values.  For example, a combined div/rem operation will
-   define both the dividend and the remainder. Many other situations require
-   multiple values as well.  Each node also has some number of operands, which
-   are edges to the node defining the used value.  Because nodes may define
-   multiple values, edges are represented by instances of the <tt>SDValue</tt>
-   class, which is a <tt>&lt;SDNode, unsigned&gt;</tt> pair, indicating the node
-   and result value being used, respectively.  Each value produced by
-   an <tt>SDNode</tt> has an associated <tt>MVT</tt> (Machine Value Type)
-   indicating what the type of the value is.</p>
-
-<p>SelectionDAGs contain two different kinds of values: those that represent
-   data flow and those that represent control flow dependencies.  Data values
-   are simple edges with an integer or floating point value type.  Control edges
-   are represented as "chain" edges which are of type <tt>MVT::Other</tt>.
-   These edges provide an ordering between nodes that have side effects (such as
-   loads, stores, calls, returns, etc).  All nodes that have side effects should
-   take a token chain as input and produce a new one as output.  By convention,
-   token chain inputs are always operand #0, and chain results are always the
-   last value produced by an operation.</p>
-
-<p>A SelectionDAG has designated "Entry" and "Root" nodes.  The Entry node is
-   always a marker node with an Opcode of <tt>ISD::EntryToken</tt>.  The Root
-   node is the final side-effecting node in the token chain. For example, in a
-   single basic block function it would be the return node.</p>
-
-<p>One important concept for SelectionDAGs is the notion of a "legal" vs.
-   "illegal" DAG.  A legal DAG for a target is one that only uses supported
-   operations and supported types.  On a 32-bit PowerPC, for example, a DAG with
-   a value of type i1, i8, i16, or i64 would be illegal, as would a DAG that
-   uses a SREM or UREM operation.  The
-   <a href="#selectinodag_legalize_types">legalize types</a> and
-   <a href="#selectiondag_legalize">legalize operations</a> phases are
-   responsible for turning an illegal DAG into a legal DAG.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_process">SelectionDAG Instruction Selection Process</a>
-</h4>
-
-<div>
-
-<p>SelectionDAG-based instruction selection consists of the following steps:</p>
-
-<ol>
-  <li><a href="#selectiondag_build">Build initial DAG</a> &mdash; This stage
-      performs a simple translation from the input LLVM code to an illegal
-      SelectionDAG.</li>
-
-  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; This
-      stage performs simple optimizations on the SelectionDAG to simplify it,
-      and recognize meta instructions (like rotates
-      and <tt>div</tt>/<tt>rem</tt> pairs) for targets that support these meta
-      operations.  This makes the resultant code more efficient and
-      the <a href="#selectiondag_select">select instructions from DAG</a> phase
-      (below) simpler.</li>
-
-  <li><a href="#selectiondag_legalize_types">Legalize SelectionDAG Types</a>
-      &mdash; This stage transforms SelectionDAG nodes to eliminate any types
-      that are unsupported on the target.</li>
-
-  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; The
-      SelectionDAG optimizer is run to clean up redundancies exposed by type
-      legalization.</li>
-
-  <li><a href="#selectiondag_legalize">Legalize SelectionDAG Ops</a> &mdash;
-      This stage transforms SelectionDAG nodes to eliminate any operations 
-      that are unsupported on the target.</li>
-
-  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; The
-      SelectionDAG optimizer is run to eliminate inefficiencies introduced by
-      operation legalization.</li>
-
-  <li><a href="#selectiondag_select">Select instructions from DAG</a> &mdash;
-      Finally, the target instruction selector matches the DAG operations to
-      target instructions.  This process translates the target-independent input
-      DAG into another DAG of target instructions.</li>
-
-  <li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation</a>
-      &mdash; The last phase assigns a linear order to the instructions in the
-      target-instruction DAG and emits them into the MachineFunction being
-      compiled.  This step uses traditional prepass scheduling techniques.</li>
-</ol>
-
-<p>After all of these steps are complete, the SelectionDAG is destroyed and the
-   rest of the code generation passes are run.</p>
-
-<p>One great way to visualize what is going on here is to take advantage of a
-   few LLC command line options.  The following options pop up a window
-   displaying the SelectionDAG at specific times (if you only get errors printed
-   to the console while using this, you probably
-   <a href="ProgrammersManual.html#ViewGraph">need to configure your system</a>
-   to add support for it).</p>
-
-<ul>
-  <li><tt>-view-dag-combine1-dags</tt> displays the DAG after being built,
-      before the first optimization pass.</li>
-
-  <li><tt>-view-legalize-dags</tt> displays the DAG before Legalization.</li>
-
-  <li><tt>-view-dag-combine2-dags</tt> displays the DAG before the second
-      optimization pass.</li>
-
-  <li><tt>-view-isel-dags</tt> displays the DAG before the Select phase.</li>
-
-  <li><tt>-view-sched-dags</tt> displays the DAG before Scheduling.</li>
-</ul>
-
-<p>The <tt>-view-sunit-dags</tt> displays the Scheduler's dependency graph.
-   This graph is based on the final SelectionDAG, with nodes that must be
-   scheduled together bundled into a single scheduling-unit node, and with
-   immediate operands and other nodes that aren't relevant for scheduling
-   omitted.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_build">Initial SelectionDAG Construction</a>
-</h4>
-
-<div>
-
-<p>The initial SelectionDAG is na&iuml;vely peephole expanded from the LLVM
-   input by the <tt>SelectionDAGLowering</tt> class in the
-   <tt>lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp</tt> file.  The intent of
-   this pass is to expose as much low-level, target-specific details to the
-   SelectionDAG as possible.  This pass is mostly hard-coded (e.g. an
-   LLVM <tt>add</tt> turns into an <tt>SDNode add</tt> while a
-   <tt>getelementptr</tt> is expanded into the obvious arithmetic). This pass
-   requires target-specific hooks to lower calls, returns, varargs, etc.  For
-   these features, the <tt><a href="#targetlowering">TargetLowering</a></tt>
-   interface is used.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_legalize_types">SelectionDAG LegalizeTypes Phase</a>
-</h4>
-
-<div>
-
-<p>The Legalize phase is in charge of converting a DAG to only use the types
-   that are natively supported by the target.</p>
-
-<p>There are two main ways of converting values of unsupported scalar types to
-   values of supported types: converting small types to larger types
-   ("promoting"), and breaking up large integer types into smaller ones
-   ("expanding").  For example, a target might require that all f32 values are
-   promoted to f64 and that all i1/i8/i16 values are promoted to i32.  The same
-   target might require that all i64 values be expanded into pairs of i32
-   values.  These changes can insert sign and zero extensions as needed to make
-   sure that the final code has the same behavior as the input.</p>
-
-<p>There are two main ways of converting values of unsupported vector types to
-   value of supported types: splitting vector types, multiple times if
-   necessary, until a legal type is found, and extending vector types by adding
-   elements to the end to round them out to legal types ("widening").  If a
-   vector gets split all the way down to single-element parts with no supported
-   vector type being found, the elements are converted to scalars
-   ("scalarizing").</p>
-
-<p>A target implementation tells the legalizer which types are supported (and
-   which register class to use for them) by calling the
-   <tt>addRegisterClass</tt> method in its TargetLowering constructor.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_legalize">SelectionDAG Legalize Phase</a>
-</h4>
-
-<div>
-
-<p>The Legalize phase is in charge of converting a DAG to only use the
-   operations that are natively supported by the target.</p>
-
-<p>Targets often have weird constraints, such as not supporting every operation
-   on every supported datatype (e.g. X86 does not support byte conditional moves
-   and PowerPC does not support sign-extending loads from a 16-bit memory
-   location).  Legalize takes care of this by open-coding another sequence of
-   operations to emulate the operation ("expansion"), by promoting one type to a
-   larger type that supports the operation ("promotion"), or by using a
-   target-specific hook to implement the legalization ("custom").</p>
-
-<p>A target implementation tells the legalizer which operations are not
-   supported (and which of the above three actions to take) by calling the
-   <tt>setOperationAction</tt> method in its <tt>TargetLowering</tt>
-   constructor.</p>
-
-<p>Prior to the existence of the Legalize passes, we required that every target
-   <a href="#selectiondag_optimize">selector</a> supported and handled every
-   operator and type even if they are not natively supported.  The introduction
-   of the Legalize phases allows all of the canonicalization patterns to be
-   shared across targets, and makes it very easy to optimize the canonicalized
-   code because it is still in the form of a DAG.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_optimize">
-    SelectionDAG Optimization Phase: the DAG Combiner
-  </a>
-</h4>
-
-<div>
-
-<p>The SelectionDAG optimization phase is run multiple times for code
-   generation, immediately after the DAG is built and once after each
-   legalization.  The first run of the pass allows the initial code to be
-   cleaned up (e.g. performing optimizations that depend on knowing that the
-   operators have restricted type inputs).  Subsequent runs of the pass clean up
-   the messy code generated by the Legalize passes, which allows Legalize to be
-   very simple (it can focus on making code legal instead of focusing on
-   generating <em>good</em> and legal code).</p>
-
-<p>One important class of optimizations performed is optimizing inserted sign
-   and zero extension instructions.  We currently use ad-hoc techniques, but
-   could move to more rigorous techniques in the future.  Here are some good
-   papers on the subject:</p>
-
-<p>"<a href="http://www.eecs.harvard.edu/~nr/pubs/widen-abstract.html">Widening
-   integer arithmetic</a>"<br>
-   Kevin Redwine and Norman Ramsey<br>
-   International Conference on Compiler Construction (CC) 2004</p>
-
-<p>"<a href="http://portal.acm.org/citation.cfm?doid=512529.512552">Effective
-   sign extension elimination</a>"<br>
-   Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani<br>
-   Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design
-   and Implementation.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_select">SelectionDAG Select Phase</a>
-</h4>
-
-<div>
-
-<p>The Select phase is the bulk of the target-specific code for instruction
-   selection.  This phase takes a legal SelectionDAG as input, pattern matches
-   the instructions supported by the target to this DAG, and produces a new DAG
-   of target code.  For example, consider the following LLVM fragment:</p>
-
-<div class="doc_code">
-<pre>
-%t1 = fadd float %W, %X
-%t2 = fmul float %t1, %Y
-%t3 = fadd float %t2, %Z
-</pre>
-</div>
-
-<p>This LLVM code corresponds to a SelectionDAG that looks basically like
-   this:</p>
-
-<div class="doc_code">
-<pre>
-(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
-</pre>
-</div>
-
-<p>If a target supports floating point multiply-and-add (FMA) operations, one of
-   the adds can be merged with the multiply.  On the PowerPC, for example, the
-   output of the instruction selector might look like this DAG:</p>
-
-<div class="doc_code">
-<pre>
-(FMADDS (FADDS W, X), Y, Z)
-</pre>
-</div>
-
-<p>The <tt>FMADDS</tt> instruction is a ternary instruction that multiplies its
-first two operands and adds the third (as single-precision floating-point
-numbers).  The <tt>FADDS</tt> instruction is a simple binary single-precision
-add instruction.  To perform this pattern match, the PowerPC backend includes
-the following instruction definitions:</p>
-
-<div class="doc_code">
-<pre>
-def FMADDS : AForm_1&lt;59, 29,
-                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
-                    "fmadds $FRT, $FRA, $FRC, $FRB",
-                    [<b>(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
-                                           F4RC:$FRB))</b>]&gt;;
-def FADDS : AForm_2&lt;59, 21,
-                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
-                    "fadds $FRT, $FRA, $FRB",
-                    [<b>(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))</b>]&gt;;
-</pre>
-</div>
-
-<p>The portion of the instruction definition in bold indicates the pattern used
-   to match the instruction.  The DAG operators
-   (like <tt>fmul</tt>/<tt>fadd</tt>) are defined in
-   the <tt>include/llvm/Target/TargetSelectionDAG.td</tt> file.  "
-   <tt>F4RC</tt>" is the register class of the input and result values.</p>
-
-<p>The TableGen DAG instruction selector generator reads the instruction
-   patterns in the <tt>.td</tt> file and automatically builds parts of the
-   pattern matching code for your target.  It has the following strengths:</p>
-
-<ul>
-  <li>At compiler-compiler time, it analyzes your instruction patterns and tells
-      you if your patterns make sense or not.</li>
-
-  <li>It can handle arbitrary constraints on operands for the pattern match.  In
-      particular, it is straight-forward to say things like "match any immediate
-      that is a 13-bit sign-extended value".  For examples, see the
-      <tt>immSExt16</tt> and related <tt>tblgen</tt> classes in the PowerPC
-      backend.</li>
-
-  <li>It knows several important identities for the patterns defined.  For
-      example, it knows that addition is commutative, so it allows the
-      <tt>FMADDS</tt> pattern above to match "<tt>(fadd X, (fmul Y, Z))</tt>" as
-      well as "<tt>(fadd (fmul X, Y), Z)</tt>", without the target author having
-      to specially handle this case.</li>
-
-  <li>It has a full-featured type-inferencing system.  In particular, you should
-      rarely have to explicitly tell the system what type parts of your patterns
-      are.  In the <tt>FMADDS</tt> case above, we didn't have to tell
-      <tt>tblgen</tt> that all of the nodes in the pattern are of type 'f32'.
-      It was able to infer and propagate this knowledge from the fact that
-      <tt>F4RC</tt> has type 'f32'.</li>
-
-  <li>Targets can define their own (and rely on built-in) "pattern fragments".
-      Pattern fragments are chunks of reusable patterns that get inlined into
-      your patterns during compiler-compiler time.  For example, the integer
-      "<tt>(not x)</tt>" operation is actually defined as a pattern fragment
-      that expands as "<tt>(xor x, -1)</tt>", since the SelectionDAG does not
-      have a native '<tt>not</tt>' operation.  Targets can define their own
-      short-hand fragments as they see fit.  See the definition of
-      '<tt>not</tt>' and '<tt>ineg</tt>' for examples.</li>
-
-  <li>In addition to instructions, targets can specify arbitrary patterns that
-      map to one or more instructions using the 'Pat' class.  For example, the
-      PowerPC has no way to load an arbitrary integer immediate into a register
-      in one instruction. To tell tblgen how to do this, it defines:
-      <br>
-      <br>
-<div class="doc_code">
-<pre>
-// Arbitrary immediate support.  Implement in terms of LIS/ORI.
-def : Pat&lt;(i32 imm:$imm),
-          (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))&gt;;
-</pre>
-</div>
-      <br>
-      If none of the single-instruction patterns for loading an immediate into a
-      register match, this will be used.  This rule says "match an arbitrary i32
-      immediate, turning it into an <tt>ORI</tt> ('or a 16-bit immediate') and
-      an <tt>LIS</tt> ('load 16-bit immediate, where the immediate is shifted to
-      the left 16 bits') instruction".  To make this work, the
-      <tt>LO16</tt>/<tt>HI16</tt> node transformations are used to manipulate
-      the input immediate (in this case, take the high or low 16-bits of the
-      immediate).</li>
-
-  <li>While the system does automate a lot, it still allows you to write custom
-      C++ code to match special cases if there is something that is hard to
-      express.</li>
-</ul>
-
-<p>While it has many strengths, the system currently has some limitations,
-   primarily because it is a work in progress and is not yet finished:</p>
-
-<ul>
-  <li>Overall, there is no way to define or match SelectionDAG nodes that define
-      multiple values (e.g. <tt>SMUL_LOHI</tt>, <tt>LOAD</tt>, <tt>CALL</tt>,
-      etc).  This is the biggest reason that you currently still <em>have
-      to</em> write custom C++ code for your instruction selector.</li>
-
-  <li>There is no great way to support matching complex addressing modes yet.
-      In the future, we will extend pattern fragments to allow them to define
-      multiple values (e.g. the four operands of the <a href="#x86_memory">X86
-      addressing mode</a>, which are currently matched with custom C++ code).
-      In addition, we'll extend fragments so that a fragment can match multiple
-      different patterns.</li>
-
-  <li>We don't automatically infer flags like isStore/isLoad yet.</li>
-
-  <li>We don't automatically generate the set of supported registers and
-      operations for the <a href="#selectiondag_legalize">Legalizer</a>
-      yet.</li>
-
-  <li>We don't have a way of tying in custom legalized nodes yet.</li>
-</ul>
-
-<p>Despite these limitations, the instruction selector generator is still quite
-   useful for most of the binary and logical operations in typical instruction
-   sets.  If you run into any problems or can't figure out how to do something,
-   please let Chris know!</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_sched">SelectionDAG Scheduling and Formation Phase</a>
-</h4>
-
-<div>
-
-<p>The scheduling phase takes the DAG of target instructions from the selection
-   phase and assigns an order.  The scheduler can pick an order depending on
-   various constraints of the machines (i.e. order for minimal register pressure
-   or try to cover instruction latencies).  Once an order is established, the
-   DAG is converted to a list
-   of <tt><a href="#machineinstr">MachineInstr</a></tt>s and the SelectionDAG is
-   destroyed.</p>
-
-<p>Note that this phase is logically separate from the instruction selection
-   phase, but is tied to it closely in the code because it operates on
-   SelectionDAGs.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="selectiondag_future">Future directions for the SelectionDAG</a>
-</h4>
-
-<div>
-
-<ol>
-  <li>Optional function-at-a-time selection.</li>
-
-  <li>Auto-generate entire selector from <tt>.td</tt> file.</li>
-</ol>
-
-</div>
- 
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ssamco">SSA-based Machine Code Optimizations</a>
-</h3>
-<div><p>To Be Written</p></div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="liveintervals">Live Intervals</a>
-</h3>
-
-<div>
-
-<p>Live Intervals are the ranges (intervals) where a variable is <i>live</i>.
-   They are used by some <a href="#regalloc">register allocator</a> passes to
-   determine if two or more virtual registers which require the same physical
-   register are live at the same point in the program (i.e., they conflict).
-   When this situation occurs, one virtual register must be <i>spilled</i>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="livevariable_analysis">Live Variable Analysis</a>
-</h4>
-
-<div>
-
-<p>The first step in determining the live intervals of variables is to calculate
-   the set of registers that are immediately dead after the instruction (i.e.,
-   the instruction calculates the value, but it is never used) and the set of
-   registers that are used by the instruction, but are never used after the
-   instruction (i.e., they are killed). Live variable information is computed
-   for each <i>virtual</i> register and <i>register allocatable</i> physical
-   register in the function.  This is done in a very efficient manner because it
-   uses SSA to sparsely compute lifetime information for virtual registers
-   (which are in SSA form) and only has to track physical registers within a
-   block.  Before register allocation, LLVM can assume that physical registers
-   are only live within a single basic block.  This allows it to do a single,
-   local analysis to resolve physical register lifetimes within each basic
-   block. If a physical register is not register allocatable (e.g., a stack
-   pointer or condition codes), it is not tracked.</p>
-
-<p>Physical registers may be live in to or out of a function. Live in values are
-   typically arguments in registers. Live out values are typically return values
-   in registers. Live in values are marked as such, and are given a dummy
-   "defining" instruction during live intervals analysis. If the last basic
-   block of a function is a <tt>return</tt>, then it's marked as using all live
-   out values in the function.</p>
-
-<p><tt>PHI</tt> nodes need to be handled specially, because the calculation of
-   the live variable information from a depth first traversal of the CFG of the
-   function won't guarantee that a virtual register used by the <tt>PHI</tt>
-   node is defined before it's used. When a <tt>PHI</tt> node is encountered,
-   only the definition is handled, because the uses will be handled in other
-   basic blocks.</p>
-
-<p>For each <tt>PHI</tt> node of the current basic block, we simulate an
-   assignment at the end of the current basic block and traverse the successor
-   basic blocks. If a successor basic block has a <tt>PHI</tt> node and one of
-   the <tt>PHI</tt> node's operands is coming from the current basic block, then
-   the variable is marked as <i>alive</i> within the current basic block and all
-   of its predecessor basic blocks, until the basic block with the defining
-   instruction is encountered.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="liveintervals_analysis">Live Intervals Analysis</a>
-</h4>
-
-<div>
-
-<p>We now have the information available to perform the live intervals analysis
-   and build the live intervals themselves.  We start off by numbering the basic
-   blocks and machine instructions.  We then handle the "live-in" values.  These
-   are in physical registers, so the physical register is assumed to be killed
-   by the end of the basic block.  Live intervals for virtual registers are
-   computed for some ordering of the machine instructions <tt>[1, N]</tt>.  A
-   live interval is an interval <tt>[i, j)</tt>, where <tt>1 &lt;= i &lt;= j
-   &lt; N</tt>, for which a variable is live.</p>
-
-<p><i><b>More to come...</b></i></p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="regalloc">Register Allocation</a>
-</h3>
-
-<div>
-
-<p>The <i>Register Allocation problem</i> consists in mapping a program
-   <i>P<sub>v</sub></i>, that can use an unbounded number of virtual registers,
-   to a program <i>P<sub>p</sub></i> that contains a finite (possibly small)
-   number of physical registers. Each target architecture has a different number
-   of physical registers. If the number of physical registers is not enough to
-   accommodate all the virtual registers, some of them will have to be mapped
-   into memory. These virtuals are called <i>spilled virtuals</i>.</p>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="regAlloc_represent">How registers are represented in LLVM</a>
-</h4>
-
-<div>
-
-<p>In LLVM, physical registers are denoted by integer numbers that normally
-   range from 1 to 1023. To see how this numbering is defined for a particular
-   architecture, you can read the <tt>GenRegisterNames.inc</tt> file for that
-   architecture. For instance, by
-   inspecting <tt>lib/Target/X86/X86GenRegisterInfo.inc</tt> we see that the
-   32-bit register <tt>EAX</tt> is denoted by 43, and the MMX register
-   <tt>MM0</tt> is mapped to 65.</p>
-
-<p>Some architectures contain registers that share the same physical location. A
-   notable example is the X86 platform. For instance, in the X86 architecture,
-   the registers <tt>EAX</tt>, <tt>AX</tt> and <tt>AL</tt> share the first eight
-   bits. These physical registers are marked as <i>aliased</i> in LLVM. Given a
-   particular architecture, you can check which registers are aliased by
-   inspecting its <tt>RegisterInfo.td</tt> file. Moreover, the class
-   <tt>MCRegAliasIterator</tt> enumerates all the physical registers aliased to
-   a register.</p>
-
-<p>Physical registers, in LLVM, are grouped in <i>Register Classes</i>.
-   Elements in the same register class are functionally equivalent, and can be
-   interchangeably used. Each virtual register can only be mapped to physical
-   registers of a particular class. For instance, in the X86 architecture, some
-   virtuals can only be allocated to 8 bit registers.  A register class is
-   described by <tt>TargetRegisterClass</tt> objects.  To discover if a virtual
-   register is compatible with a given physical, this code can be used:</p>
-
-<div class="doc_code">
-<pre>
-bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
-                                      unsigned v_reg,
-                                      unsigned p_reg) {
-  assert(TargetRegisterInfo::isPhysicalRegister(p_reg) &amp;&amp;
-         "Target register must be physical");
-  const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg);
-  return trc-&gt;contains(p_reg);
-}
-</pre>
-</div>
-
-<p>Sometimes, mostly for debugging purposes, it is useful to change the number
-   of physical registers available in the target architecture. This must be done
-   statically, inside the <tt>TargetRegsterInfo.td</tt> file. Just <tt>grep</tt>
-   for <tt>RegisterClass</tt>, the last parameter of which is a list of
-   registers. Just commenting some out is one simple way to avoid them being
-   used. A more polite way is to explicitly exclude some registers from
-   the <i>allocation order</i>. See the definition of the <tt>GR8</tt> register
-   class in <tt>lib/Target/X86/X86RegisterInfo.td</tt> for an example of this.
-   </p>
-
-<p>Virtual registers are also denoted by integer numbers. Contrary to physical
-   registers, different virtual registers never share the same number. Whereas
-   physical registers are statically defined in a <tt>TargetRegisterInfo.td</tt>
-   file and cannot be created by the application developer, that is not the case
-   with virtual registers. In order to create new virtual registers, use the
-   method <tt>MachineRegisterInfo::createVirtualRegister()</tt>. This method
-   will return a new virtual register. Use an <tt>IndexedMap&lt;Foo,
-   VirtReg2IndexFunctor&gt;</tt> to hold information per virtual register. If you
-   need to enumerate all virtual registers, use the function
-   <tt>TargetRegisterInfo::index2VirtReg()</tt> to find the virtual register
-   numbers:</p>
-
-<div class="doc_code">
-<pre>
-  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
-    unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
-    stuff(VirtReg);
-  }
-</pre>
-</div>
-
-<p>Before register allocation, the operands of an instruction are mostly virtual
-   registers, although physical registers may also be used. In order to check if
-   a given machine operand is a register, use the boolean
-   function <tt>MachineOperand::isRegister()</tt>. To obtain the integer code of
-   a register, use <tt>MachineOperand::getReg()</tt>. An instruction may define
-   or use a register. For instance, <tt>ADD reg:1026 := reg:1025 reg:1024</tt>
-   defines the registers 1024, and uses registers 1025 and 1026. Given a
-   register operand, the method <tt>MachineOperand::isUse()</tt> informs if that
-   register is being used by the instruction. The
-   method <tt>MachineOperand::isDef()</tt> informs if that registers is being
-   defined.</p>
-
-<p>We will call physical registers present in the LLVM bitcode before register
-   allocation <i>pre-colored registers</i>. Pre-colored registers are used in
-   many different situations, for instance, to pass parameters of functions
-   calls, and to store results of particular instructions. There are two types
-   of pre-colored registers: the ones <i>implicitly</i> defined, and
-   those <i>explicitly</i> defined. Explicitly defined registers are normal
-   operands, and can be accessed
-   with <tt>MachineInstr::getOperand(int)::getReg()</tt>.  In order to check
-   which registers are implicitly defined by an instruction, use
-   the <tt>TargetInstrInfo::get(opcode)::ImplicitDefs</tt>,
-   where <tt>opcode</tt> is the opcode of the target instruction. One important
-   difference between explicit and implicit physical registers is that the
-   latter are defined statically for each instruction, whereas the former may
-   vary depending on the program being compiled. For example, an instruction
-   that represents a function call will always implicitly define or use the same
-   set of physical registers. To read the registers implicitly used by an
-   instruction,
-   use <tt>TargetInstrInfo::get(opcode)::ImplicitUses</tt>. Pre-colored
-   registers impose constraints on any register allocation algorithm. The
-   register allocator must make sure that none of them are overwritten by
-   the values of virtual registers while still alive.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="regAlloc_howTo">Mapping virtual registers to physical registers</a>
-</h4>
-
-<div>
-
-<p>There are two ways to map virtual registers to physical registers (or to
-   memory slots). The first way, that we will call <i>direct mapping</i>, is
-   based on the use of methods of the classes <tt>TargetRegisterInfo</tt>,
-   and <tt>MachineOperand</tt>. The second way, that we will call <i>indirect
-   mapping</i>, relies on the <tt>VirtRegMap</tt> class in order to insert loads
-   and stores sending and getting values to and from memory.</p>
-
-<p>The direct mapping provides more flexibility to the developer of the register
-   allocator; however, it is more error prone, and demands more implementation
-   work.  Basically, the programmer will have to specify where load and store
-   instructions should be inserted in the target function being compiled in
-   order to get and store values in memory. To assign a physical register to a
-   virtual register present in a given operand,
-   use <tt>MachineOperand::setReg(p_reg)</tt>. To insert a store instruction,
-   use <tt>TargetInstrInfo::storeRegToStackSlot(...)</tt>, and to insert a
-   load instruction, use <tt>TargetInstrInfo::loadRegFromStackSlot</tt>.</p>
-
-<p>The indirect mapping shields the application developer from the complexities
-   of inserting load and store instructions. In order to map a virtual register
-   to a physical one, use <tt>VirtRegMap::assignVirt2Phys(vreg, preg)</tt>.  In
-   order to map a certain virtual register to memory,
-   use <tt>VirtRegMap::assignVirt2StackSlot(vreg)</tt>. This method will return
-   the stack slot where <tt>vreg</tt>'s value will be located.  If it is
-   necessary to map another virtual register to the same stack slot,
-   use <tt>VirtRegMap::assignVirt2StackSlot(vreg, stack_location)</tt>. One
-   important point to consider when using the indirect mapping, is that even if
-   a virtual register is mapped to memory, it still needs to be mapped to a
-   physical register. This physical register is the location where the virtual
-   register is supposed to be found before being stored or after being
-   reloaded.</p>
-
-<p>If the indirect strategy is used, after all the virtual registers have been
-   mapped to physical registers or stack slots, it is necessary to use a spiller
-   object to place load and store instructions in the code. Every virtual that
-   has been mapped to a stack slot will be stored to memory after been defined
-   and will be loaded before being used. The implementation of the spiller tries
-   to recycle load/store instructions, avoiding unnecessary instructions. For an
-   example of how to invoke the spiller,
-   see <tt>RegAllocLinearScan::runOnMachineFunction</tt>
-   in <tt>lib/CodeGen/RegAllocLinearScan.cpp</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="regAlloc_twoAddr">Handling two address instructions</a>
-</h4>
-
-<div>
-
-<p>With very rare exceptions (e.g., function calls), the LLVM machine code
-   instructions are three address instructions. That is, each instruction is
-   expected to define at most one register, and to use at most two registers.
-   However, some architectures use two address instructions. In this case, the
-   defined register is also one of the used register. For instance, an
-   instruction such as <tt>ADD %EAX, %EBX</tt>, in X86 is actually equivalent
-   to <tt>%EAX = %EAX + %EBX</tt>.</p>
-
-<p>In order to produce correct code, LLVM must convert three address
-   instructions that represent two address instructions into true two address
-   instructions. LLVM provides the pass <tt>TwoAddressInstructionPass</tt> for
-   this specific purpose. It must be run before register allocation takes
-   place. After its execution, the resulting code may no longer be in SSA
-   form. This happens, for instance, in situations where an instruction such
-   as <tt>%a = ADD %b %c</tt> is converted to two instructions such as:</p>
-
-<div class="doc_code">
-<pre>
-%a = MOVE %b
-%a = ADD %a %c
-</pre>
-</div>
-
-<p>Notice that, internally, the second instruction is represented as
-   <tt>ADD %a[def/use] %c</tt>. I.e., the register operand <tt>%a</tt> is both
-   used and defined by the instruction.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="regAlloc_ssaDecon">The SSA deconstruction phase</a>
-</h4>
-
-<div>
-
-<p>An important transformation that happens during register allocation is called
-   the <i>SSA Deconstruction Phase</i>. The SSA form simplifies many analyses
-   that are performed on the control flow graph of programs. However,
-   traditional instruction sets do not implement PHI instructions. Thus, in
-   order to generate executable code, compilers must replace PHI instructions
-   with other instructions that preserve their semantics.</p>
-
-<p>There are many ways in which PHI instructions can safely be removed from the
-   target code. The most traditional PHI deconstruction algorithm replaces PHI
-   instructions with copy instructions. That is the strategy adopted by
-   LLVM. The SSA deconstruction algorithm is implemented
-   in <tt>lib/CodeGen/PHIElimination.cpp</tt>. In order to invoke this pass, the
-   identifier <tt>PHIEliminationID</tt> must be marked as required in the code
-   of the register allocator.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="regAlloc_fold">Instruction folding</a>
-</h4>
-
-<div>
-
-<p><i>Instruction folding</i> is an optimization performed during register
-   allocation that removes unnecessary copy instructions. For instance, a
-   sequence of instructions such as:</p>
-
-<div class="doc_code">
-<pre>
-%EBX = LOAD %mem_address
-%EAX = COPY %EBX
-</pre>
-</div>
-
-<p>can be safely substituted by the single instruction:</p>
-
-<div class="doc_code">
-<pre>
-%EAX = LOAD %mem_address
-</pre>
-</div>
-
-<p>Instructions can be folded with
-   the <tt>TargetRegisterInfo::foldMemoryOperand(...)</tt> method. Care must be
-   taken when folding instructions; a folded instruction can be quite different
-   from the original
-   instruction. See <tt>LiveIntervals::addIntervalsForSpills</tt>
-   in <tt>lib/CodeGen/LiveIntervalAnalysis.cpp</tt> for an example of its
-   use.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="regAlloc_builtIn">Built in register allocators</a>
-</h4>
-
-<div>
-
-<p>The LLVM infrastructure provides the application developer with three
-   different register allocators:</p>
-
-<ul>
-  <li><i>Fast</i> &mdash; This register allocator is the default for debug
-      builds. It allocates registers on a basic block level, attempting to keep
-      values in registers and reusing registers as appropriate.</li>
-
-  <li><i>Basic</i> &mdash; This is an incremental approach to register
-  allocation. Live ranges are assigned to registers one at a time in
-  an order that is driven by heuristics. Since code can be rewritten
-  on-the-fly during allocation, this framework allows interesting
-  allocators to be developed as extensions. It is not itself a
-  production register allocator but is a potentially useful
-  stand-alone mode for triaging bugs and as a performance baseline.
-
-  <li><i>Greedy</i> &mdash; <i>The default allocator</i>. This is a
-  highly tuned implementation of the <i>Basic</i> allocator that
-  incorporates global live range splitting. This allocator works hard
-  to minimize the cost of spill code.
-
-  <li><i>PBQP</i> &mdash; A Partitioned Boolean Quadratic Programming (PBQP)
-      based register allocator. This allocator works by constructing a PBQP
-      problem representing the register allocation problem under consideration,
-      solving this using a PBQP solver, and mapping the solution back to a
-      register assignment.</li>
-</ul>
-
-<p>The type of register allocator used in <tt>llc</tt> can be chosen with the
-   command line option <tt>-regalloc=...</tt>:</p>
-
-<div class="doc_code">
-<pre>
-$ llc -regalloc=linearscan file.bc -o ln.s;
-$ llc -regalloc=fast file.bc -o fa.s;
-$ llc -regalloc=pbqp file.bc -o pbqp.s;
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="proepicode">Prolog/Epilog Code Insertion</a>
-</h3>
-
-<div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="compact_unwind">Compact Unwind</a>
-</h4>
-
-<div>
-
-<p>Throwing an exception requires <em>unwinding</em> out of a function. The
-   information on how to unwind a given function is traditionally expressed in
-   DWARF unwind (a.k.a. frame) info. But that format was originally developed
-   for debuggers to backtrace, and each Frame Description Entry (FDE) requires
-   ~20-30 bytes per function. There is also the cost of mapping from an address
-   in a function to the corresponding FDE at runtime. An alternative unwind
-   encoding is called <em>compact unwind</em> and requires just 4-bytes per
-   function.</p>
-
-<p>The compact unwind encoding is a 32-bit value, which is encoded in an
-   architecture-specific way. It specifies which registers to restore and from
-   where, and how to unwind out of the function. When the linker creates a final
-   linked image, it will create a <code>__TEXT,__unwind_info</code>
-   section. This section is a small and fast way for the runtime to access
-   unwind info for any given function. If we emit compact unwind info for the
-   function, that compact unwind info will be encoded in
-   the <code>__TEXT,__unwind_info</code> section. If we emit DWARF unwind info,
-   the <code>__TEXT,__unwind_info</code> section will contain the offset of the
-   FDE in the <code>__TEXT,__eh_frame</code> section in the final linked
-   image.</p>
-
-<p>For X86, there are three modes for the compact unwind encoding:</p>
-
-<dl>
-  <dt><i>Function with a Frame Pointer (<code>EBP</code> or <code>RBP</code>)</i></dt>
-  <dd><p><code>EBP/RBP</code>-based frame, where <code>EBP/RBP</code> is pushed
-      onto the stack immediately after the return address,
-      then <code>ESP/RSP</code> is moved to <code>EBP/RBP</code>. Thus to
-      unwind, <code>ESP/RSP</code> is restored with the
-      current <code>EBP/RBP</code> value, then <code>EBP/RBP</code> is restored
-      by popping the stack, and the return is done by popping the stack once
-      more into the PC. All non-volatile registers that need to be restored must
-      have been saved in a small range on the stack that
-      starts <code>EBP-4</code> to <code>EBP-1020</code> (<code>RBP-8</code>
-      to <code>RBP-1020</code>). The offset (divided by 4 in 32-bit mode and 8
-      in 64-bit mode) is encoded in bits 16-23 (mask: <code>0x00FF0000</code>).
-      The registers saved are encoded in bits 0-14
-      (mask: <code>0x00007FFF</code>) as five 3-bit entries from the following
-      table:</p>
-<table border="1" cellspacing="0">
-  <tr>
-    <th>Compact Number</th>
-    <th>i386 Register</th>
-    <th>x86-64 Regiser</th>
-  </tr>
-  <tr>
-    <td>1</td>
-    <td><code>EBX</code></td>
-    <td><code>RBX</code></td>
-  </tr>
-  <tr>
-    <td>2</td>
-    <td><code>ECX</code></td>
-    <td><code>R12</code></td>
-  </tr>
-  <tr>
-    <td>3</td>
-    <td><code>EDX</code></td>
-    <td><code>R13</code></td>
-  </tr>
-  <tr>
-    <td>4</td>
-    <td><code>EDI</code></td>
-    <td><code>R14</code></td>
-  </tr>
-  <tr>
-    <td>5</td>
-    <td><code>ESI</code></td>
-    <td><code>R15</code></td>
-  </tr>
-  <tr>
-    <td>6</td>
-    <td><code>EBP</code></td>
-    <td><code>RBP</code></td>
-  </tr>
-</table>
-
-</dd>
-
-  <dt><i>Frameless with a Small Constant Stack Size (<code>EBP</code>
-         or <code>RBP</code> is not used as a frame pointer)</i></dt>
-  <dd><p>To return, a constant (encoded in the compact unwind encoding) is added
-      to the <code>ESP/RSP</code>.  Then the return is done by popping the stack
-      into the PC. All non-volatile registers that need to be restored must have
-      been saved on the stack immediately after the return address. The stack
-      size (divided by 4 in 32-bit mode and 8 in 64-bit mode) is encoded in bits
-      16-23 (mask: <code>0x00FF0000</code>). There is a maximum stack size of
-      1024 bytes in 32-bit mode and 2048 in 64-bit mode. The number of registers
-      saved is encoded in bits 9-12 (mask: <code>0x00001C00</code>). Bits 0-9
-      (mask: <code>0x000003FF</code>) contain which registers were saved and
-      their order. (See
-      the <code>encodeCompactUnwindRegistersWithoutFrame()</code> function
-      in <code>lib/Target/X86FrameLowering.cpp</code> for the encoding
-      algorithm.)</p></dd>
-
-  <dt><i>Frameless with a Large Constant Stack Size (<code>EBP</code>
-         or <code>RBP</code> is not used as a frame pointer)</i></dt>
-  <dd><p>This case is like the "Frameless with a Small Constant Stack Size"
-      case, but the stack size is too large to encode in the compact unwind
-      encoding. Instead it requires that the function contains "<code>subl
-      $nnnnnn, %esp</code>" in its prolog. The compact encoding contains the
-      offset to the <code>$nnnnnn</code> value in the function in bits 9-12
-      (mask: <code>0x00001C00</code>).</p></dd>
-</dl>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="latemco">Late Machine Code Optimizations</a>
-</h3>
-<div><p>To Be Written</p></div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="codeemit">Code Emission</a>
-</h3>
-
-<div>
-
-<p>The code emission step of code generation is responsible for lowering from
-the code generator abstractions (like <a 
-href="#machinefunction">MachineFunction</a>, <a 
-href="#machineinstr">MachineInstr</a>, etc) down
-to the abstractions used by the MC layer (<a href="#mcinst">MCInst</a>, 
-<a href="#mcstreamer">MCStreamer</a>, etc).  This is
-done with a combination of several different classes: the (misnamed)
-target-independent AsmPrinter class, target-specific subclasses of AsmPrinter
-(such as SparcAsmPrinter), and the TargetLoweringObjectFile class.</p>
-
-<p>Since the MC layer works at the level of abstraction of object files, it
-doesn't have a notion of functions, global variables etc.  Instead, it thinks
-about labels, directives, and instructions.  A key class used at this time is
-the MCStreamer class.  This is an abstract API that is implemented in different
-ways (e.g. to output a .s file, output an ELF .o file, etc) that is effectively
-an "assembler API".  MCStreamer has one method per directive, such as EmitLabel,
-EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly
-level directives.
-</p>
-
-<p>If you are interested in implementing a code generator for a target, there
-are three important things that you have to implement for your target:</p>
-
-<ol>
-<li>First, you need a subclass of AsmPrinter for your target.  This class
-implements the general lowering process converting MachineFunction's into MC
-label constructs.  The AsmPrinter base class provides a number of useful methods
-and routines, and also allows you to override the lowering process in some
-important ways.  You should get much of the lowering for free if you are
-implementing an ELF, COFF, or MachO target, because the TargetLoweringObjectFile
-class implements much of the common logic.</li>
-
-<li>Second, you need to implement an instruction printer for your target.  The
-instruction printer takes an <a href="#mcinst">MCInst</a> and renders it to a
-raw_ostream as text.  Most of this is automatically generated from the .td file
-(when you specify something like "<tt>add $dst, $src1, $src2</tt>" in the
-instructions), but you need to implement routines to print operands.</li>
-
-<li>Third, you need to implement code that lowers a <a
-href="#machineinstr">MachineInstr</a> to an MCInst, usually implemented in
-"&lt;target&gt;MCInstLower.cpp".  This lowering process is often target
-specific, and is responsible for turning jump table entries, constant pool
-indices, global variable addresses, etc into MCLabels as appropriate.  This
-translation layer is also responsible for expanding pseudo ops used by the code
-generator into the actual machine instructions they correspond to. The MCInsts
-that are generated by this are fed into the instruction printer or the encoder.
-</li>
-
-</ol>
-
-<p>Finally, at your choosing, you can also implement an subclass of
-MCCodeEmitter which lowers MCInst's into machine code bytes and relocations.
-This is important if you want to support direct .o file emission, or would like
-to implement an assembler for your target.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="vliw_packetizer">VLIW Packetizer</a>
-</h3>
-
-<div>
-
-<p>In a Very Long Instruction Word (VLIW) architecture, the compiler is
-   responsible for mapping instructions to functional-units available on
-   the architecture. To that end, the compiler creates groups of instructions
-   called <i>packets</i> or <i>bundles</i>. The VLIW packetizer in LLVM is
-   a target-independent mechanism to enable the packetization of machine
-   instructions.</p>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="vliw_mapping">Mapping from instructions to functional units</a>
-</h4>
-
-<div>
-
-<p>Instructions in a VLIW target can typically be mapped to multiple functional
-units. During the process of packetizing, the compiler must be able to reason
-about whether an instruction can be added to a packet. This decision can be
-complex since the compiler has to examine all possible mappings of instructions
-to functional units. Therefore to alleviate compilation-time complexity, the
-VLIW packetizer parses the instruction classes of a target and generates tables
-at compiler build time. These tables can then be queried by the provided
-machine-independent API to determine if an instruction can be accommodated in a
-packet.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="vliw_repr">
-    How the packetization tables are generated and used
-  </a>
-</h4>
-
-<div>
-
-<p>The packetizer reads instruction classes from a target's itineraries and
-creates a deterministic finite automaton (DFA) to represent the state of a
-packet. A DFA consists of three major elements: inputs, states, and
-transitions. The set of inputs for the generated DFA represents the instruction
-being added to a packet. The states represent the possible consumption
-of functional units by instructions in a packet. In the DFA, transitions from
-one state to another occur on the addition of an instruction to an existing
-packet. If there is a legal mapping of functional units to instructions, then
-the DFA contains a corresponding transition. The absence of a transition
-indicates that a legal mapping does not exist and that the instruction cannot
-be added to the packet.</p>
-
-<p>To generate tables for a VLIW target, add <i>Target</i>GenDFAPacketizer.inc
-as a target to the Makefile in the target directory. The exported API provides
-three functions: <tt>DFAPacketizer::clearResources()</tt>,
-<tt>DFAPacketizer::reserveResources(MachineInstr *MI)</tt>, and
-<tt>DFAPacketizer::canReserveResources(MachineInstr *MI)</tt>. These functions
-allow a target packetizer to add an instruction to an existing packet and to
-check whether an instruction can be added to a packet. See
-<tt>llvm/CodeGen/DFAPacketizer.h</tt> for more information.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="nativeassembler">Implementing a Native Assembler</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Though you're probably reading this because you want to write or maintain a
-compiler backend, LLVM also fully supports building a native assemblers too.
-We've tried hard to automate the generation of the assembler from the .td files
-(in particular the instruction syntax and encodings), which means that a large
-part of the manual and repetitive data entry can be factored and shared with the
-compiler.</p>
-
-<!-- ======================================================================= -->
-<h3 id="na_instparsing">Instruction Parsing</h3>
-
-<div><p>To Be Written</p></div>
-
-
-<!-- ======================================================================= -->
-<h3 id="na_instaliases">
-  Instruction Alias Processing
-</h3>
-
-<div>
-<p>Once the instruction is parsed, it enters the MatchInstructionImpl function.
-The MatchInstructionImpl function performs alias processing and then does
-actual matching.</p>
-
-<p>Alias processing is the phase that canonicalizes different lexical forms of
-the same instructions down to one representation.  There are several different
-kinds of alias that are possible to implement and they are listed below in the
-order that they are processed (which is in order from simplest/weakest to most
-complex/powerful).  Generally you want to use the first alias mechanism that
-meets the needs of your instruction, because it will allow a more concise
-description.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>Mnemonic Aliases</h4>
-
-<div>
-
-<p>The first phase of alias processing is simple instruction mnemonic
-remapping for classes of instructions which are allowed with two different
-mnemonics.  This phase is a simple and unconditionally remapping from one input
-mnemonic to one output mnemonic.  It isn't possible for this form of alias to
-look at the operands at all, so the remapping must apply for all forms of a
-given mnemonic.  Mnemonic aliases are defined simply, for example X86 has:
-</p>
-
-<div class="doc_code">
-<pre>
-def : MnemonicAlias&lt;"cbw",     "cbtw"&gt;;
-def : MnemonicAlias&lt;"smovq",   "movsq"&gt;;
-def : MnemonicAlias&lt;"fldcww",  "fldcw"&gt;;
-def : MnemonicAlias&lt;"fucompi", "fucomip"&gt;;
-def : MnemonicAlias&lt;"ud2a",    "ud2"&gt;;
-</pre>
-</div>
-
-<p>... and many others.  With a MnemonicAlias definition, the mnemonic is
-remapped simply and directly.  Though MnemonicAlias's can't look at any aspect
-of the instruction (such as the operands) they can depend on global modes (the
-same ones supported by the matcher), through a Requires clause:</p>
-
-<div class="doc_code">
-<pre>
-def : MnemonicAlias&lt;"pushf", "pushfq"&gt;, Requires&lt;[In64BitMode]&gt;;
-def : MnemonicAlias&lt;"pushf", "pushfl"&gt;, Requires&lt;[In32BitMode]&gt;;
-</pre>
-</div>
-
-<p>In this example, the mnemonic gets mapped into different a new one depending
-on the current instruction set.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>Instruction Aliases</h4>
-
-<div>
-
-<p>The most general phase of alias processing occurs while matching is
-happening: it provides new forms for the matcher to match along with a specific
-instruction to generate.  An instruction alias has two parts: the string to
-match and the instruction to generate.  For example:
-</p>
-
-<div class="doc_code">
-<pre>
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8  :$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr8  GR32:$dst, GR8  :$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr8  GR64:$dst, GR8  :$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)&gt;;
-def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)&gt;;
-</pre>
-</div>
-
-<p>This shows a powerful example of the instruction aliases, matching the
-same mnemonic in multiple different ways depending on what operands are present
-in the assembly.  The result of instruction aliases can include operands in a
-different order than the destination instruction, and can use an input
-multiple times, for example:</p>
-
-<div class="doc_code">
-<pre>
-def : InstAlias&lt;"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)&gt;;
-def : InstAlias&lt;"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)&gt;;
-def : InstAlias&lt;"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)&gt;;
-def : InstAlias&lt;"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)&gt;;
-</pre>
-</div>
-
-<p>This example also shows that tied operands are only listed once.  In the X86
-backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied
-to the output).  InstAliases take a flattened operand list without duplicates
-for tied operands.  The result of an instruction alias can also use immediates
-and fixed physical registers which are added as simple immediate operands in the
-result, for example:</p>
-
-<div class="doc_code">
-<pre>
-// Fixed Immediate operand.
-def : InstAlias&lt;"aad", (AAD8i8 10)&gt;;
-
-// Fixed register operand.
-def : InstAlias&lt;"fcomi", (COM_FIr ST1)&gt;;
-
-// Simple alias.
-def : InstAlias&lt;"fcomi $reg", (COM_FIr RST:$reg)&gt;;
-</pre>
-</div>
-
-
-<p>Instruction aliases can also have a Requires clause to make them
-subtarget specific.</p>
-
-<p>If the back-end supports it, the instruction printer can automatically emit
-   the alias rather than what's being aliased. It typically leads to better,
-   more readable code. If it's better to print out what's being aliased, then
-   pass a '0' as the third parameter to the InstAlias definition.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3 id="na_matching">Instruction Matching</h3>
-
-<div><p>To Be Written</p></div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="targetimpls">Target-specific Implementation Notes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section of the document explains features or design decisions that are
-   specific to the code generator for a particular target.  First we start
-   with a table that summarizes what features are supported by each target.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetfeatures">Target Feature Matrix</a>
-</h3>
-
-<div>
-
-<p>Note that this table does not include the C backend or Cpp backends, since
-they do not use the target independent code generator infrastructure.  It also
-doesn't list features that are not supported fully by any target yet.  It
-considers a feature to be supported if at least one subtarget supports it.  A
-feature being supported means that it is useful and works for most cases, it
-does not indicate that there are zero known bugs in the implementation.  Here
-is the key:</p>
-
-
-<table border="1" cellspacing="0">
-  <tr>
-    <th>Unknown</th>
-    <th>No support</th>
-    <th>Partial Support</th>
-    <th>Complete Support</th>
-  </tr>
-  <tr>
-    <td class="unknown"></td>
-    <td class="no"></td>
-    <td class="partial"></td>
-    <td class="yes"></td>
-  </tr>
-</table>
-
-<p>Here is the table:</p>
-
-<table width="689" border="1" cellspacing="0">
-<tr><td></td>
-<td colspan="13" align="center" style="background-color:#ffc">Target</td>
-</tr>
-  <tr>
-    <th>Feature</th>
-    <th>ARM</th>
-    <th>CellSPU</th>
-    <th>Hexagon</th>
-    <th>MBlaze</th>
-    <th>MSP430</th>
-    <th>Mips</th>
-    <th>PTX</th>
-    <th>PowerPC</th>
-    <th>Sparc</th>
-    <th>X86</th>
-    <th>XCore</th>
-  </tr>
-
-<tr>
-  <td><a href="#feat_reliable">is generally reliable</a></td>
-  <td class="yes"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="yes"></td> <!-- Hexagon -->
-  <td class="no"></td> <!-- MBlaze -->
-  <td class="unknown"></td> <!-- MSP430 -->
-  <td class="yes"></td> <!-- Mips -->
-  <td class="no"></td> <!-- PTX -->
-  <td class="yes"></td> <!-- PowerPC -->
-  <td class="yes"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="unknown"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_asmparser">assembly parser</a></td>
-  <td class="no"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="no"></td> <!-- Hexagon -->
-  <td class="yes"></td> <!-- MBlaze -->
-  <td class="no"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="no"></td> <!-- PTX -->
-  <td class="no"></td> <!-- PowerPC -->
-  <td class="no"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="no"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_disassembler">disassembler</a></td>
-  <td class="yes"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="no"></td> <!-- Hexagon -->
-  <td class="yes"></td> <!-- MBlaze -->
-  <td class="no"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="no"></td> <!-- PTX -->
-  <td class="no"></td> <!-- PowerPC -->
-  <td class="no"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="no"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_inlineasm">inline asm</a></td>
-  <td class="yes"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="yes"></td> <!-- Hexagon -->
-  <td class="yes"></td> <!-- MBlaze -->
-  <td class="unknown"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="unknown"></td> <!-- PTX -->
-  <td class="yes"></td> <!-- PowerPC -->
-  <td class="unknown"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="unknown"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_jit">jit</a></td>
-  <td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="no"></td> <!-- Hexagon -->
-  <td class="no"></td> <!-- MBlaze -->
-  <td class="unknown"></td> <!-- MSP430 -->
-  <td class="yes"></td> <!-- Mips -->
-  <td class="unknown"></td> <!-- PTX -->
-  <td class="yes"></td> <!-- PowerPC -->
-  <td class="unknown"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="unknown"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>
-  <td class="no"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="no"></td> <!-- Hexagon -->
-  <td class="yes"></td> <!-- MBlaze -->
-  <td class="no"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="no"></td> <!-- PTX -->
-  <td class="no"></td> <!-- PowerPC -->
-  <td class="no"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="no"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_tailcall">tail calls</a></td>
-  <td class="yes"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="yes"></td> <!-- Hexagon -->
-  <td class="no"></td> <!-- MBlaze -->
-  <td class="unknown"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="unknown"></td> <!-- PTX -->
-  <td class="yes"></td> <!-- PowerPC -->
-  <td class="unknown"></td> <!-- Sparc -->
-  <td class="yes"></td> <!-- X86 -->
-  <td class="unknown"></td> <!-- XCore -->
-</tr>
-
-<tr>
-  <td><a href="#feat_segstacks">segmented stacks</a></td>
-  <td class="no"></td> <!-- ARM -->
-  <td class="no"></td> <!-- CellSPU -->
-  <td class="no"></td> <!-- Hexagon -->
-  <td class="no"></td> <!-- MBlaze -->
-  <td class="no"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
-  <td class="no"></td> <!-- PTX -->
-  <td class="no"></td> <!-- PowerPC -->
-  <td class="no"></td> <!-- Sparc -->
-  <td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->
-  <td class="no"></td> <!-- XCore -->
-</tr>
-
-
-</table>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_reliable">Is Generally Reliable</h4>
-
-<div>
-<p>This box indicates whether the target is considered to be production quality.
-This indicates that the target has been used as a static compiler to
-compile large amounts of code by a variety of different people and is in
-continuous use.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_asmparser">Assembly Parser</h4>
-
-<div>
-<p>This box indicates whether the target supports parsing target specific .s
-files by implementing the MCAsmParser interface.  This is required for llvm-mc
-to be able to act as a native assembler and is required for inline assembly
-support in the native .o file writer.</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_disassembler">Disassembler</h4>
-
-<div>
-<p>This box indicates whether the target supports the MCDisassembler API for
-disassembling machine opcode bytes into MCInst's.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_inlineasm">Inline Asm</h4>
-
-<div>
-<p>This box indicates whether the target supports most popular inline assembly
-constraints and modifiers.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_jit">JIT Support</h4>
-
-<div>
-<p>This box indicates whether the target supports the JIT compiler through
-the ExecutionEngine interface.</p>
-
-<p id="feat_jit_arm">The ARM backend has basic support for integer code
-in ARM codegen mode, but lacks NEON and full Thumb support.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_objectwrite">.o File Writing</h4>
-
-<div>
-
-<p>This box indicates whether the target supports writing .o files (e.g. MachO,
-ELF, and/or COFF) files directly from the target.  Note that the target also
-must include an assembly parser and general inline assembly support for full
-inline assembly support in the .o writer.</p>
-
-<p>Targets that don't support this feature can obviously still write out .o
-files, they just rely on having an external assembler to translate from a .s
-file to a .o file (as is the case for many C compilers).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_tailcall">Tail Calls</h4>
-
-<div>
-
-<p>This box indicates whether the target supports guaranteed tail calls.  These
-are calls marked "<a href="LangRef.html#i_call">tail</a>" and use the fastcc
-calling convention.  Please see the <a href="#tailcallopt">tail call section
-more more details</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4 id="feat_segstacks">Segmented Stacks</h4>
-
-<div>
-
-<p>This box indicates whether the target supports segmented stacks. This
-replaces the traditional large C stack with many linked segments. It
-is compatible with the <a href="http://gcc.gnu.org/wiki/SplitStacks">gcc
-implementation</a> used by the Go front end.</p>
-
-<p id="feat_segstacks_x86">Basic support exists on the X86 backend. Currently
-vararg doesn't work and the object files are not marked the way the gold
-linker expects, but simple Go programs can be built by dragonegg.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="tailcallopt">Tail call optimization</a>
-</h3>
-
-<div>
-
-<p>Tail call optimization, callee reusing the stack of the caller, is currently
-   supported on x86/x86-64 and PowerPC. It is performed if:</p>
-
-<ul>
-  <li>Caller and callee have the calling convention <tt>fastcc</tt> or
-       <tt>cc 10</tt> (GHC call convention).</li>
-
-  <li>The call is a tail call - in tail position (ret immediately follows call
-      and ret uses value of call or is void).</li>
-
-  <li>Option <tt>-tailcallopt</tt> is enabled.</li>
-
-  <li>Platform specific constraints are met.</li>
-</ul>
-
-<p>x86/x86-64 constraints:</p>
-
-<ul>
-  <li>No variable argument lists are used.</li>
-
-  <li>On x86-64 when generating GOT/PIC code only module-local calls (visibility
-  = hidden or protected) are supported.</li>
-</ul>
-
-<p>PowerPC constraints:</p>
-
-<ul>
-  <li>No variable argument lists are used.</li>
-
-  <li>No byval parameters are used.</li>
-
-  <li>On ppc32/64 GOT/PIC only module-local calls (visibility = hidden or protected) are supported.</li>
-</ul>
-
-<p>Example:</p>
-
-<p>Call as <tt>llc -tailcallopt test.ll</tt>.</p>
-
-<div class="doc_code">
-<pre>
-declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4)
-
-define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
-  %l1 = add i32 %in1, %in2
-  %tmp = tail call fastcc i32 @tailcallee(i32 %in1 inreg, i32 %in2 inreg, i32 %in1, i32 %l1)
-  ret i32 %tmp
-}
-</pre>
-</div>
-
-<p>Implications of <tt>-tailcallopt</tt>:</p>
-
-<p>To support tail call optimization in situations where the callee has more
-   arguments than the caller a 'callee pops arguments' convention is used. This
-   currently causes each <tt>fastcc</tt> call that is not tail call optimized
-   (because one or more of above constraints are not met) to be followed by a
-   readjustment of the stack. So performance might be worse in such cases.</p>
-
-</div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="sibcallopt">Sibling call optimization</a>
-</h3>
-
-<div>
-
-<p>Sibling call optimization is a restricted form of tail call optimization.
-   Unlike tail call optimization described in the previous section, it can be
-   performed automatically on any tail calls when <tt>-tailcallopt</tt> option
-   is not specified.</p>
-
-<p>Sibling call optimization is currently performed on x86/x86-64 when the
-   following constraints are met:</p>
-
-<ul>
-  <li>Caller and callee have the same calling convention. It can be either
-      <tt>c</tt> or <tt>fastcc</tt>.
-
-  <li>The call is a tail call - in tail position (ret immediately follows call
-      and ret uses value of call or is void).</li>
-
-  <li>Caller and callee have matching return type or the callee result is not
-      used.
-
-  <li>If any of the callee arguments are being passed in stack, they must be
-      available in caller's own incoming argument stack and the frame offsets
-      must be the same.
-</ul>
-
-<p>Example:</p>
-<div class="doc_code">
-<pre>
-declare i32 @bar(i32, i32)
-
-define i32 @foo(i32 %a, i32 %b, i32 %c) {
-entry:
-  %0 = tail call i32 @bar(i32 %a, i32 %b)
-  ret i32 %0
-}
-</pre>
-</div>
-
-</div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="x86">The X86 backend</a>
-</h3>
-
-<div>
-
-<p>The X86 code generator lives in the <tt>lib/Target/X86</tt> directory.  This
-   code generator is capable of targeting a variety of x86-32 and x86-64
-   processors, and includes support for ISA extensions such as MMX and SSE.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="x86_tt">X86 Target Triples supported</a>
-</h4>
-
-<div>
-
-<p>The following are the known target triples that are supported by the X86
-   backend.  This is not an exhaustive list, and it would be useful to add those
-   that people test.</p>
-
-<ul>
-  <li><b>i686-pc-linux-gnu</b> &mdash; Linux</li>
-
-  <li><b>i386-unknown-freebsd5.3</b> &mdash; FreeBSD 5.3</li>
-
-  <li><b>i686-pc-cygwin</b> &mdash; Cygwin on Win32</li>
-
-  <li><b>i686-pc-mingw32</b> &mdash; MingW on Win32</li>
-
-  <li><b>i386-pc-mingw32msvc</b> &mdash; MingW crosscompiler on Linux</li>
-
-  <li><b>i686-apple-darwin*</b> &mdash; Apple Darwin on X86</li>
-
-  <li><b>x86_64-unknown-linux-gnu</b> &mdash; Linux</li>
-</ul>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="x86_cc">X86 Calling Conventions supported</a>
-</h4>
-
-
-<div>
-
-<p>The following target-specific calling conventions are known to backend:</p>
-
-<ul>
-<li><b>x86_StdCall</b> &mdash; stdcall calling convention seen on Microsoft
-    Windows platform (CC ID = 64).</li>
-<li><b>x86_FastCall</b> &mdash; fastcall calling convention seen on Microsoft
-    Windows platform (CC ID = 65).</li>
-<li><b>x86_ThisCall</b> &mdash; Similar to X86_StdCall. Passes first argument
-    in ECX,  others via stack. Callee is responsible for stack cleaning. This
-    convention is used by MSVC by default for methods in its ABI
-    (CC ID = 70).</li>
-</ul>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="x86_memory">Representing X86 addressing modes in MachineInstrs</a>
-</h4>
-
-<div>
-
-<p>The x86 has a very flexible way of accessing memory.  It is capable of
-   forming memory addresses of the following expression directly in integer
-   instructions (which use ModR/M addressing):</p>
-
-<div class="doc_code">
-<pre>
-SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
-</pre>
-</div>
-
-<p>In order to represent this, LLVM tracks no less than 5 operands for each
-   memory operand of this form.  This means that the "load" form of
-   '<tt>mov</tt>' has the following <tt>MachineOperand</tt>s in this order:</p>
-
-<div class="doc_code">
-<pre>
-Index:        0     |    1        2       3           4          5
-Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
-OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
-</pre>
-</div>
-
-<p>Stores, and all other instructions, treat the four memory operands in the
-   same way and in the same order.  If the segment register is unspecified
-   (regno = 0), then no segment override is generated.  "Lea" operations do not
-   have a segment register specified, so they only have 4 operands for their
-   memory reference.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="x86_memory">X86 address spaces supported</a>
-</h4>
-
-<div>
-
-<p>x86 has a feature which provides
-   the ability to perform loads and stores to different address spaces
-   via the x86 segment registers.  A segment override prefix byte on an
-   instruction causes the instruction's memory access to go to the specified
-   segment.  LLVM address space 0 is the default address space, which includes
-   the stack, and any unqualified memory accesses in a program.  Address spaces
-   1-255 are currently reserved for user-defined code.  The GS-segment is
-   represented by address space 256, while the FS-segment is represented by 
-   address space 257. Other x86 segments have yet to be allocated address space
-   numbers.</p>
-
-<p>While these address spaces may seem similar to TLS via the
-   <tt>thread_local</tt> keyword, and often use the same underlying hardware,
-   there are some fundamental differences.</p>
-
-<p>The <tt>thread_local</tt> keyword applies to global variables and
-   specifies that they are to be allocated in thread-local memory. There are
-   no type qualifiers involved, and these variables can be pointed to with
-   normal pointers and accessed with normal loads and stores.
-   The <tt>thread_local</tt> keyword is target-independent at the LLVM IR
-   level (though LLVM doesn't yet have implementations of it for some
-   configurations).<p>
-
-<p>Special address spaces, in contrast, apply to static types. Every
-   load and store has a particular address space in its address operand type,
-   and this is what determines which address space is accessed.
-   LLVM ignores these special address space qualifiers on global variables,
-   and does not provide a way to directly allocate storage in them.
-   At the LLVM IR level, the behavior of these special address spaces depends
-   in part on the underlying OS or runtime environment, and they are specific
-   to x86 (and LLVM doesn't yet handle them correctly in some cases).</p>
-
-<p>Some operating systems and runtime environments use (or may in the future
-   use) the FS/GS-segment registers for various low-level purposes, so care
-   should be taken when considering them.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="x86_names">Instruction naming</a>
-</h4>
-
-<div>
-
-<p>An instruction name consists of the base name, a default operand size, and a
-   a character per operand with an optional special size. For example:</p>
-
-<div class="doc_code">
-<pre>
-ADD8rr      -&gt; add, 8-bit register, 8-bit register
-IMUL16rmi   -&gt; imul, 16-bit register, 16-bit memory, 16-bit immediate
-IMUL16rmi8  -&gt; imul, 16-bit register, 16-bit memory, 8-bit immediate
-MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ppc">The PowerPC backend</a>
-</h3>
-
-<div>
-
-<p>The PowerPC code generator lives in the lib/Target/PowerPC directory.  The
-   code generation is retargetable to several variations or <i>subtargets</i> of
-   the PowerPC ISA; including ppc32, ppc64 and altivec.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="ppc_abi">LLVM PowerPC ABI</a>
-</h4>
-
-<div>
-
-<p>LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC
-   relative (PIC) or static addressing for accessing global values, so no TOC
-   (r2) is used. Second, r31 is used as a frame pointer to allow dynamic growth
-   of a stack frame.  LLVM takes advantage of having no TOC to provide space to
-   save the frame pointer in the PowerPC linkage area of the caller frame.
-   Other details of PowerPC ABI can be found at <a href=
-   "http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html"
-   >PowerPC ABI.</a> Note: This link describes the 32 bit ABI.  The 64 bit ABI
-   is similar except space for GPRs are 8 bytes wide (not 4) and r13 is reserved
-   for system use.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="ppc_frame">Frame Layout</a>
-</h4>
-
-<div>
-
-<p>The size of a PowerPC frame is usually fixed for the duration of a
-   function's invocation.  Since the frame is fixed size, all references
-   into the frame can be accessed via fixed offsets from the stack pointer.  The
-   exception to this is when dynamic alloca or variable sized arrays are
-   present, then a base pointer (r31) is used as a proxy for the stack pointer
-   and stack pointer is free to grow or shrink.  A base pointer is also used if
-   llvm-gcc is not passed the -fomit-frame-pointer flag. The stack pointer is
-   always aligned to 16 bytes, so that space allocated for altivec vectors will
-   be properly aligned.</p>
-
-<p>An invocation frame is laid out as follows (low memory at top);</p>
-
-<table class="layout">
-  <tr>
-    <td>Linkage<br><br></td>
-  </tr>
-  <tr>
-    <td>Parameter area<br><br></td>
-  </tr>
-  <tr>
-    <td>Dynamic area<br><br></td>
-  </tr>
-  <tr>
-    <td>Locals area<br><br></td>
-  </tr>
-  <tr>
-    <td>Saved registers area<br><br></td>
-  </tr>
-  <tr style="border-style: none hidden none hidden;">
-    <td><br></td>
-  </tr>
-  <tr>
-    <td>Previous Frame<br><br></td>
-  </tr>
-</table>
-
-<p>The <i>linkage</i> area is used by a callee to save special registers prior
-   to allocating its own frame.  Only three entries are relevant to LLVM. The
-   first entry is the previous stack pointer (sp), aka link.  This allows
-   probing tools like gdb or exception handlers to quickly scan the frames in
-   the stack.  A function epilog can also use the link to pop the frame from the
-   stack.  The third entry in the linkage area is used to save the return
-   address from the lr register. Finally, as mentioned above, the last entry is
-   used to save the previous frame pointer (r31.)  The entries in the linkage
-   area are the size of a GPR, thus the linkage area is 24 bytes long in 32 bit
-   mode and 48 bytes in 64 bit mode.</p>
-
-<p>32 bit linkage area</p>
-
-<table class="layout">
-  <tr>
-    <td>0</td>
-    <td>Saved SP (r1)</td>
-  </tr>
-  <tr>
-    <td>4</td>
-    <td>Saved CR</td>
-  </tr>
-  <tr>
-    <td>8</td>
-    <td>Saved LR</td>
-  </tr>
-  <tr>
-    <td>12</td>
-    <td>Reserved</td>
-  </tr>
-  <tr>
-    <td>16</td>
-    <td>Reserved</td>
-  </tr>
-  <tr>
-    <td>20</td>
-    <td>Saved FP (r31)</td>
-  </tr>
-</table>
-
-<p>64 bit linkage area</p>
-
-<table class="layout">
-  <tr>
-    <td>0</td>
-    <td>Saved SP (r1)</td>
-  </tr>
-  <tr>
-    <td>8</td>
-    <td>Saved CR</td>
-  </tr>
-  <tr>
-    <td>16</td>
-    <td>Saved LR</td>
-  </tr>
-  <tr>
-    <td>24</td>
-    <td>Reserved</td>
-  </tr>
-  <tr>
-    <td>32</td>
-    <td>Reserved</td>
-  </tr>
-  <tr>
-    <td>40</td>
-    <td>Saved FP (r31)</td>
-  </tr>
-</table>
-
-<p>The <i>parameter area</i> is used to store arguments being passed to a callee
-   function.  Following the PowerPC ABI, the first few arguments are actually
-   passed in registers, with the space in the parameter area unused.  However,
-   if there are not enough registers or the callee is a thunk or vararg
-   function, these register arguments can be spilled into the parameter area.
-   Thus, the parameter area must be large enough to store all the parameters for
-   the largest call sequence made by the caller.  The size must also be
-   minimally large enough to spill registers r3-r10.  This allows callees blind
-   to the call signature, such as thunks and vararg functions, enough space to
-   cache the argument registers.  Therefore, the parameter area is minimally 32
-   bytes (64 bytes in 64 bit mode.)  Also note that since the parameter area is
-   a fixed offset from the top of the frame, that a callee can access its spilt
-   arguments using fixed offsets from the stack pointer (or base pointer.)</p>
-
-<p>Combining the information about the linkage, parameter areas and alignment. A
-   stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit
-   mode.</p>
-
-<p>The <i>dynamic area</i> starts out as size zero.  If a function uses dynamic
-   alloca then space is added to the stack, the linkage and parameter areas are
-   shifted to top of stack, and the new space is available immediately below the
-   linkage and parameter areas.  The cost of shifting the linkage and parameter
-   areas is minor since only the link value needs to be copied.  The link value
-   can be easily fetched by adding the original frame size to the base pointer.
-   Note that allocations in the dynamic space need to observe 16 byte
-   alignment.</p>
-
-<p>The <i>locals area</i> is where the llvm compiler reserves space for local
-   variables.</p>
-
-<p>The <i>saved registers area</i> is where the llvm compiler spills callee
-   saved registers on entry to the callee.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="ppc_prolog">Prolog/Epilog</a>
-</h4>
-
-<div>
-
-<p>The llvm prolog and epilog are the same as described in the PowerPC ABI, with
-   the following exceptions.  Callee saved registers are spilled after the frame
-   is created.  This allows the llvm epilog/prolog support to be common with
-   other targets.  The base pointer callee saved register r31 is saved in the
-   TOC slot of linkage area.  This simplifies allocation of space for the base
-   pointer and makes it convenient to locate programatically and during
-   debugging.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="ppc_dynamic">Dynamic Allocation</a>
-</h4>
-
-<div>
-
-<p><i>TODO - More to come.</i></p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ptx">The PTX backend</a>
-</h3>
-
-<div>
-
-<p>The PTX code generator lives in the lib/Target/PTX directory. It is
-  currently a work-in-progress, but already supports most of the code
-  generation functionality needed to generate correct PTX kernels for
-  CUDA devices.</p>
-
-<p>The code generator can target PTX 2.0+, and shader model 1.0+.  The
-  PTX ISA Reference Manual is used as the primary source of ISA
-  information, though an effort is made to make the output of the code
-  generator match the output of the NVidia nvcc compiler, whenever
-  possible.</p>
-
-<p>Code Generator Options:</p>
-<table border="1" cellspacing="0">
-  <tr>
-    <th>Option</th>
-    <th>Description</th>
- </tr>
-   <tr>
-     <td><code>double</code></td>
-     <td align="left">If enabled, the map_f64_to_f32 directive is
-       disabled in the PTX output, allowing native double-precision
-       arithmetic</td>
-  </tr>
-  <tr>
-    <td><code>no-fma</code></td>
-    <td align="left">Disable generation of Fused-Multiply Add
-      instructions, which may be beneficial for some devices</td>
-  </tr>
-  <tr>
-    <td><code>smxy / computexy</code></td>
-    <td align="left">Set shader model/compute capability to x.y,
-    e.g. sm20 or compute13</td>
-  </tr>
-</table>
-
-<p>Working:</p>
-<ul>
-  <li>Arithmetic instruction selection (including combo FMA)</li>
-  <li>Bitwise instruction selection</li>
-  <li>Control-flow instruction selection</li>
-  <li>Function calls (only on SM 2.0+ and no return arguments)</li>
-  <li>Addresses spaces (0 = global, 1 = constant, 2 = local, 4 =
-  shared)</li>
-  <li>Thread synchronization (bar.sync)</li>
-  <li>Special register reads ([N]TID, [N]CTAID, PMx, CLOCK, etc.)</li>
-</ul>
-
-<p>In Progress:</p>
-<ul>
-  <li>Robust call instruction selection</li>
-  <li>Stack frame allocation</li>
-  <li>Device-specific instruction scheduling optimizations</li>
-</ul>
-
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
new file mode 100644
index 0000000..d1d0231
--- /dev/null
+++ b/docs/CodeGenerator.rst
@@ -0,0 +1,2428 @@
+.. _code_generator:
+
+==========================================
+The LLVM Target-Independent Code Generator
+==========================================
+
+.. role:: raw-html(raw)
+   :format: html
+
+.. raw:: html
+
+  <style>
+    .unknown { background-color: #C0C0C0; text-align: center; }
+    .unknown:before { content: "?" }
+    .no { background-color: #C11B17 }
+    .no:before { content: "N" }
+    .partial { background-color: #F88017 }
+    .yes { background-color: #0F0; }
+    .yes:before { content: "Y" }
+  </style>
+
+.. contents::
+   :local:
+
+.. warning::
+  This is a work in progress.
+
+Introduction
+============
+
+The LLVM target-independent code generator is a framework that provides a suite
+of reusable components for translating the LLVM internal representation to the
+machine code for a specified target---either in assembly form (suitable for a
+static compiler) or in binary machine code format (usable for a JIT
+compiler). The LLVM target-independent code generator consists of six main
+components:
+
+1. `Abstract target description`_ interfaces which capture important properties
+   about various aspects of the machine, independently of how they will be used.
+   These interfaces are defined in ``include/llvm/Target/``.
+
+2. Classes used to represent the `code being generated`_ for a target.  These
+   classes are intended to be abstract enough to represent the machine code for
+   *any* target machine.  These classes are defined in
+   ``include/llvm/CodeGen/``. At this level, concepts like "constant pool
+   entries" and "jump tables" are explicitly exposed.
+
+3. Classes and algorithms used to represent code as the object file level, the
+   `MC Layer`_.  These classes represent assembly level constructs like labels,
+   sections, and instructions.  At this level, concepts like "constant pool
+   entries" and "jump tables" don't exist.
+
+4. `Target-independent algorithms`_ used to implement various phases of native
+   code generation (register allocation, scheduling, stack frame representation,
+   etc).  This code lives in ``lib/CodeGen/``.
+
+5. `Implementations of the abstract target description interfaces`_ for
+   particular targets.  These machine descriptions make use of the components
+   provided by LLVM, and can optionally provide custom target-specific passes,
+   to build complete code generators for a specific target.  Target descriptions
+   live in ``lib/Target/``.
+
+6. The target-independent JIT components.  The LLVM JIT is completely target
+   independent (it uses the ``TargetJITInfo`` structure to interface for
+   target-specific issues.  The code for the target-independent JIT lives in
+   ``lib/ExecutionEngine/JIT``.
+
+Depending on which part of the code generator you are interested in working on,
+different pieces of this will be useful to you.  In any case, you should be
+familiar with the `target description`_ and `machine code representation`_
+classes.  If you want to add a backend for a new target, you will need to
+`implement the target description`_ classes for your new target and understand
+the `LLVM code representation <LangRef.html>`_.  If you are interested in
+implementing a new `code generation algorithm`_, it should only depend on the
+target-description and machine code representation classes, ensuring that it is
+portable.
+
+Required components in the code generator
+-----------------------------------------
+
+The two pieces of the LLVM code generator are the high-level interface to the
+code generator and the set of reusable components that can be used to build
+target-specific backends.  The two most important interfaces (:raw-html:`<tt>`
+`TargetMachine`_ :raw-html:`</tt>` and :raw-html:`<tt>` `TargetData`_
+:raw-html:`</tt>`) are the only ones that are required to be defined for a
+backend to fit into the LLVM system, but the others must be defined if the
+reusable code generator components are going to be used.
+
+This design has two important implications.  The first is that LLVM can support
+completely non-traditional code generation targets.  For example, the C backend
+does not require register allocation, instruction selection, or any of the other
+standard components provided by the system.  As such, it only implements these
+two interfaces, and does its own thing. Note that C backend was removed from the
+trunk since LLVM 3.1 release. Another example of a code generator like this is a
+(purely hypothetical) backend that converts LLVM to the GCC RTL form and uses
+GCC to emit machine code for a target.
+
+This design also implies that it is possible to design and implement radically
+different code generators in the LLVM system that do not make use of any of the
+built-in components.  Doing so is not recommended at all, but could be required
+for radically different targets that do not fit into the LLVM machine
+description model: FPGAs for example.
+
+.. _high-level design of the code generator:
+
+The high-level design of the code generator
+-------------------------------------------
+
+The LLVM target-independent code generator is designed to support efficient and
+quality code generation for standard register-based microprocessors.  Code
+generation in this model is divided into the following stages:
+
+1. `Instruction Selection`_ --- This phase determines an efficient way to
+   express the input LLVM code in the target instruction set.  This stage
+   produces the initial code for the program in the target instruction set, then
+   makes use of virtual registers in SSA form and physical registers that
+   represent any required register assignments due to target constraints or
+   calling conventions.  This step turns the LLVM code into a DAG of target
+   instructions.
+
+2. `Scheduling and Formation`_ --- This phase takes the DAG of target
+   instructions produced by the instruction selection phase, determines an
+   ordering of the instructions, then emits the instructions as :raw-html:`<tt>`
+   `MachineInstr`_\s :raw-html:`</tt>` with that ordering.  Note that we
+   describe this in the `instruction selection section`_ because it operates on
+   a `SelectionDAG`_.
+
+3. `SSA-based Machine Code Optimizations`_ --- This optional stage consists of a
+   series of machine-code optimizations that operate on the SSA-form produced by
+   the instruction selector.  Optimizations like modulo-scheduling or peephole
+   optimization work here.
+
+4. `Register Allocation`_ --- The target code is transformed from an infinite
+   virtual register file in SSA form to the concrete register file used by the
+   target.  This phase introduces spill code and eliminates all virtual register
+   references from the program.
+
+5. `Prolog/Epilog Code Insertion`_ --- Once the machine code has been generated
+   for the function and the amount of stack space required is known (used for
+   LLVM alloca's and spill slots), the prolog and epilog code for the function
+   can be inserted and "abstract stack location references" can be eliminated.
+   This stage is responsible for implementing optimizations like frame-pointer
+   elimination and stack packing.
+
+6. `Late Machine Code Optimizations`_ --- Optimizations that operate on "final"
+   machine code can go here, such as spill code scheduling and peephole
+   optimizations.
+
+7. `Code Emission`_ --- The final stage actually puts out the code for the
+   current function, either in the target assembler format or in machine
+   code.
+
+The code generator is based on the assumption that the instruction selector will
+use an optimal pattern matching selector to create high-quality sequences of
+native instructions.  Alternative code generator designs based on pattern
+expansion and aggressive iterative peephole optimization are much slower.  This
+design permits efficient compilation (important for JIT environments) and
+aggressive optimization (used when generating code offline) by allowing
+components of varying levels of sophistication to be used for any step of
+compilation.
+
+In addition to these stages, target implementations can insert arbitrary
+target-specific passes into the flow.  For example, the X86 target uses a
+special pass to handle the 80x87 floating point stack architecture.  Other
+targets with unusual requirements can be supported with custom passes as needed.
+
+Using TableGen for target description
+-------------------------------------
+
+The target description classes require a detailed description of the target
+architecture.  These target descriptions often have a large amount of common
+information (e.g., an ``add`` instruction is almost identical to a ``sub``
+instruction).  In order to allow the maximum amount of commonality to be
+factored out, the LLVM code generator uses the
+`TableGen <TableGenFundamentals.html>`_ tool to describe big chunks of the
+target machine, which allows the use of domain-specific and target-specific
+abstractions to reduce the amount of repetition.
+
+As LLVM continues to be developed and refined, we plan to move more and more of
+the target description to the ``.td`` form.  Doing so gives us a number of
+advantages.  The most important is that it makes it easier to port LLVM because
+it reduces the amount of C++ code that has to be written, and the surface area
+of the code generator that needs to be understood before someone can get
+something working.  Second, it makes it easier to change things. In particular,
+if tables and other things are all emitted by ``tblgen``, we only need a change
+in one place (``tblgen``) to update all of the targets to a new interface.
+
+.. _Abstract target description:
+.. _target description:
+
+Target description classes
+==========================
+
+The LLVM target description classes (located in the ``include/llvm/Target``
+directory) provide an abstract description of the target machine independent of
+any particular client.  These classes are designed to capture the *abstract*
+properties of the target (such as the instructions and registers it has), and do
+not incorporate any particular pieces of code generation algorithms.
+
+All of the target description classes (except the :raw-html:`<tt>` `TargetData`_
+:raw-html:`</tt>` class) are designed to be subclassed by the concrete target
+implementation, and have virtual methods implemented.  To get to these
+implementations, the :raw-html:`<tt>` `TargetMachine`_ :raw-html:`</tt>` class
+provides accessors that should be implemented by the target.
+
+.. _TargetMachine:
+
+The ``TargetMachine`` class
+---------------------------
+
+The ``TargetMachine`` class provides virtual methods that are used to access the
+target-specific implementations of the various target description classes via
+the ``get*Info`` methods (``getInstrInfo``, ``getRegisterInfo``,
+``getFrameInfo``, etc.).  This class is designed to be specialized by a concrete
+target implementation (e.g., ``X86TargetMachine``) which implements the various
+virtual methods.  The only required target description class is the
+:raw-html:`<tt>` `TargetData`_ :raw-html:`</tt>` class, but if the code
+generator components are to be used, the other interfaces should be implemented
+as well.
+
+.. _TargetData:
+
+The ``TargetData`` class
+------------------------
+
+The ``TargetData`` class is the only required target description class, and it
+is the only class that is not extensible (you cannot derived a new class from
+it).  ``TargetData`` specifies information about how the target lays out memory
+for structures, the alignment requirements for various data types, the size of
+pointers in the target, and whether the target is little-endian or
+big-endian.
+
+.. _targetlowering:
+
+The ``TargetLowering`` class
+----------------------------
+
+The ``TargetLowering`` class is used by SelectionDAG based instruction selectors
+primarily to describe how LLVM code should be lowered to SelectionDAG
+operations.  Among other things, this class indicates:
+
+* an initial register class to use for various ``ValueType``\s,
+
+* which operations are natively supported by the target machine,
+
+* the return type of ``setcc`` operations,
+
+* the type to use for shift amounts, and
+
+* various high-level characteristics, like whether it is profitable to turn
+  division by a constant into a multiplication sequence
+
+The ``TargetRegisterInfo`` class
+--------------------------------
+
+The ``TargetRegisterInfo`` class is used to describe the register file of the
+target and any interactions between the registers.
+
+Registers in the code generator are represented in the code generator by
+unsigned integers.  Physical registers (those that actually exist in the target
+description) are unique small numbers, and virtual registers are generally
+large.  Note that register ``#0`` is reserved as a flag value.
+
+Each register in the processor description has an associated
+``TargetRegisterDesc`` entry, which provides a textual name for the register
+(used for assembly output and debugging dumps) and a set of aliases (used to
+indicate whether one register overlaps with another).
+
+In addition to the per-register description, the ``TargetRegisterInfo`` class
+exposes a set of processor specific register classes (instances of the
+``TargetRegisterClass`` class).  Each register class contains sets of registers
+that have the same properties (for example, they are all 32-bit integer
+registers).  Each SSA virtual register created by the instruction selector has
+an associated register class.  When the register allocator runs, it replaces
+virtual registers with a physical register in the set.
+
+The target-specific implementations of these classes is auto-generated from a
+`TableGen <TableGenFundamentals.html>`_ description of the register file.
+
+.. _TargetInstrInfo:
+
+The ``TargetInstrInfo`` class
+-----------------------------
+
+The ``TargetInstrInfo`` class is used to describe the machine instructions
+supported by the target. It is essentially an array of ``TargetInstrDescriptor``
+objects, each of which describes one instruction the target
+supports. Descriptors define things like the mnemonic for the opcode, the number
+of operands, the list of implicit register uses and defs, whether the
+instruction has certain target-independent properties (accesses memory, is
+commutable, etc), and holds any target-specific flags.
+
+The ``TargetFrameInfo`` class
+-----------------------------
+
+The ``TargetFrameInfo`` class is used to provide information about the stack
+frame layout of the target. It holds the direction of stack growth, the known
+stack alignment on entry to each function, and the offset to the local area.
+The offset to the local area is the offset from the stack pointer on function
+entry to the first location where function data (local variables, spill
+locations) can be stored.
+
+The ``TargetSubtarget`` class
+-----------------------------
+
+The ``TargetSubtarget`` class is used to provide information about the specific
+chip set being targeted.  A sub-target informs code generation of which
+instructions are supported, instruction latencies and instruction execution
+itinerary; i.e., which processing units are used, in what order, and for how
+long.
+
+The ``TargetJITInfo`` class
+---------------------------
+
+The ``TargetJITInfo`` class exposes an abstract interface used by the
+Just-In-Time code generator to perform target-specific activities, such as
+emitting stubs.  If a ``TargetMachine`` supports JIT code generation, it should
+provide one of these objects through the ``getJITInfo`` method.
+
+.. _code being generated:
+.. _machine code representation:
+
+Machine code description classes
+================================
+
+At the high-level, LLVM code is translated to a machine specific representation
+formed out of :raw-html:`<tt>` `MachineFunction`_ :raw-html:`</tt>`,
+:raw-html:`<tt>` `MachineBasicBlock`_ :raw-html:`</tt>`, and :raw-html:`<tt>`
+`MachineInstr`_ :raw-html:`</tt>` instances (defined in
+``include/llvm/CodeGen``).  This representation is completely target agnostic,
+representing instructions in their most abstract form: an opcode and a series of
+operands.  This representation is designed to support both an SSA representation
+for machine code, as well as a register allocated, non-SSA form.
+
+.. _MachineInstr:
+
+The ``MachineInstr`` class
+--------------------------
+
+Target machine instructions are represented as instances of the ``MachineInstr``
+class.  This class is an extremely abstract way of representing machine
+instructions.  In particular, it only keeps track of an opcode number and a set
+of operands.
+
+The opcode number is a simple unsigned integer that only has meaning to a
+specific backend.  All of the instructions for a target should be defined in the
+``*InstrInfo.td`` file for the target. The opcode enum values are auto-generated
+from this description.  The ``MachineInstr`` class does not have any information
+about how to interpret the instruction (i.e., what the semantics of the
+instruction are); for that you must refer to the :raw-html:`<tt>`
+`TargetInstrInfo`_ :raw-html:`</tt>` class.
+
+The operands of a machine instruction can be of several different types: a
+register reference, a constant integer, a basic block reference, etc.  In
+addition, a machine operand should be marked as a def or a use of the value
+(though only registers are allowed to be defs).
+
+By convention, the LLVM code generator orders instruction operands so that all
+register definitions come before the register uses, even on architectures that
+are normally printed in other orders.  For example, the SPARC add instruction:
+"``add %i1, %i2, %i3``" adds the "%i1", and "%i2" registers and stores the
+result into the "%i3" register.  In the LLVM code generator, the operands should
+be stored as "``%i3, %i1, %i2``": with the destination first.
+
+Keeping destination (definition) operands at the beginning of the operand list
+has several advantages.  In particular, the debugging printer will print the
+instruction like this:
+
+.. code-block:: llvm
+
+  %r3 = add %i1, %i2
+
+Also if the first operand is a def, it is easier to `create instructions`_ whose
+only def is the first operand.
+
+.. _create instructions:
+
+Using the ``MachineInstrBuilder.h`` functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Machine instructions are created by using the ``BuildMI`` functions, located in
+the ``include/llvm/CodeGen/MachineInstrBuilder.h`` file.  The ``BuildMI``
+functions make it easy to build arbitrary machine instructions.  Usage of the
+``BuildMI`` functions look like this:
+
+.. code-block:: c++
+
+  // Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42')
+  // instruction.  The '1' specifies how many operands will be added.
+  MachineInstr *MI = BuildMI(X86::MOV32ri, 1, DestReg).addImm(42);
+
+  // Create the same instr, but insert it at the end of a basic block.
+  MachineBasicBlock &amp;MBB = ...
+  BuildMI(MBB, X86::MOV32ri, 1, DestReg).addImm(42);
+
+  // Create the same instr, but insert it before a specified iterator point.
+  MachineBasicBlock::iterator MBBI = ...
+  BuildMI(MBB, MBBI, X86::MOV32ri, 1, DestReg).addImm(42);
+
+  // Create a 'cmp Reg, 0' instruction, no destination reg.
+  MI = BuildMI(X86::CMP32ri, 2).addReg(Reg).addImm(0);
+
+  // Create an 'sahf' instruction which takes no operands and stores nothing.
+  MI = BuildMI(X86::SAHF, 0);
+
+  // Create a self looping branch instruction.
+  BuildMI(MBB, X86::JNE, 1).addMBB(&amp;MBB);
+
+The key thing to remember with the ``BuildMI`` functions is that you have to
+specify the number of operands that the machine instruction will take.  This
+allows for efficient memory allocation.  You also need to specify if operands
+default to be uses of values, not definitions.  If you need to add a definition
+operand (other than the optional destination register), you must explicitly mark
+it as such:
+
+.. code-block:: c++
+
+  MI.addReg(Reg, RegState::Define);
+
+Fixed (preassigned) registers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+One important issue that the code generator needs to be aware of is the presence
+of fixed registers.  In particular, there are often places in the instruction
+stream where the register allocator *must* arrange for a particular value to be
+in a particular register.  This can occur due to limitations of the instruction
+set (e.g., the X86 can only do a 32-bit divide with the ``EAX``/``EDX``
+registers), or external factors like calling conventions.  In any case, the
+instruction selector should emit code that copies a virtual register into or out
+of a physical register when needed.
+
+For example, consider this simple LLVM example:
+
+.. code-block:: llvm
+
+  define i32 @test(i32 %X, i32 %Y) {
+    %Z = udiv i32 %X, %Y
+    ret i32 %Z
+  }
+
+The X86 instruction selector produces this machine code for the ``div`` and
+``ret`` (use "``llc X.bc -march=x86 -print-machineinstrs``" to get this):
+
+.. code-block:: llvm
+
+  ;; Start of div
+  %EAX = mov %reg1024           ;; Copy X (in reg1024) into EAX
+  %reg1027 = sar %reg1024, 31
+  %EDX = mov %reg1027           ;; Sign extend X into EDX
+  idiv %reg1025                 ;; Divide by Y (in reg1025)
+  %reg1026 = mov %EAX           ;; Read the result (Z) out of EAX
+
+  ;; Start of ret
+  %EAX = mov %reg1026           ;; 32-bit return value goes in EAX
+  ret
+
+By the end of code generation, the register allocator has coalesced the
+registers and deleted the resultant identity moves producing the following
+code:
+
+.. code-block:: llvm
+
+  ;; X is in EAX, Y is in ECX
+  mov %EAX, %EDX
+  sar %EDX, 31
+  idiv %ECX
+  ret 
+
+This approach is extremely general (if it can handle the X86 architecture, it
+can handle anything!) and allows all of the target specific knowledge about the
+instruction stream to be isolated in the instruction selector.  Note that
+physical registers should have a short lifetime for good code generation, and
+all physical registers are assumed dead on entry to and exit from basic blocks
+(before register allocation).  Thus, if you need a value to be live across basic
+block boundaries, it *must* live in a virtual register.
+
+Call-clobbered registers
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Some machine instructions, like calls, clobber a large number of physical
+registers.  Rather than adding ``<def,dead>`` operands for all of them, it is
+possible to use an ``MO_RegisterMask`` operand instead.  The register mask
+operand holds a bit mask of preserved registers, and everything else is
+considered to be clobbered by the instruction.
+
+Machine code in SSA form
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+``MachineInstr``'s are initially selected in SSA-form, and are maintained in
+SSA-form until register allocation happens.  For the most part, this is
+trivially simple since LLVM is already in SSA form; LLVM PHI nodes become
+machine code PHI nodes, and virtual registers are only allowed to have a single
+definition.
+
+After register allocation, machine code is no longer in SSA-form because there
+are no virtual registers left in the code.
+
+.. _MachineBasicBlock:
+
+The ``MachineBasicBlock`` class
+-------------------------------
+
+The ``MachineBasicBlock`` class contains a list of machine instructions
+(:raw-html:`<tt>` `MachineInstr`_ :raw-html:`</tt>` instances).  It roughly
+corresponds to the LLVM code input to the instruction selector, but there can be
+a one-to-many mapping (i.e. one LLVM basic block can map to multiple machine
+basic blocks). The ``MachineBasicBlock`` class has a "``getBasicBlock``" method,
+which returns the LLVM basic block that it comes from.
+
+.. _MachineFunction:
+
+The ``MachineFunction`` class
+-----------------------------
+
+The ``MachineFunction`` class contains a list of machine basic blocks
+(:raw-html:`<tt>` `MachineBasicBlock`_ :raw-html:`</tt>` instances).  It
+corresponds one-to-one with the LLVM function input to the instruction selector.
+In addition to a list of basic blocks, the ``MachineFunction`` contains a a
+``MachineConstantPool``, a ``MachineFrameInfo``, a ``MachineFunctionInfo``, and
+a ``MachineRegisterInfo``.  See ``include/llvm/CodeGen/MachineFunction.h`` for
+more information.
+
+``MachineInstr Bundles``
+------------------------
+
+LLVM code generator can model sequences of instructions as MachineInstr
+bundles. A MI bundle can model a VLIW group / pack which contains an arbitrary
+number of parallel instructions. It can also be used to model a sequential list
+of instructions (potentially with data dependencies) that cannot be legally
+separated (e.g. ARM Thumb2 IT blocks).
+
+Conceptually a MI bundle is a MI with a number of other MIs nested within:
+
+::
+
+  --------------
+  |   Bundle   | ---------
+  --------------          \
+         |           ----------------
+         |           |      MI      |
+         |           ----------------
+         |                   |
+         |           ----------------
+         |           |      MI      |
+         |           ----------------
+         |                   |
+         |           ----------------
+         |           |      MI      |
+         |           ----------------
+         |
+  --------------
+  |   Bundle   | --------
+  --------------         \
+         |           ----------------
+         |           |      MI      |
+         |           ----------------
+         |                   |
+         |           ----------------
+         |           |      MI      |
+         |           ----------------
+         |                   |
+         |                  ...
+         |
+  --------------
+  |   Bundle   | --------
+  --------------         \
+         |
+        ...
+
+MI bundle support does not change the physical representations of
+MachineBasicBlock and MachineInstr. All the MIs (including top level and nested
+ones) are stored as sequential list of MIs. The "bundled" MIs are marked with
+the 'InsideBundle' flag. A top level MI with the special BUNDLE opcode is used
+to represent the start of a bundle. It's legal to mix BUNDLE MIs with indiviual
+MIs that are not inside bundles nor represent bundles.
+
+MachineInstr passes should operate on a MI bundle as a single unit. Member
+methods have been taught to correctly handle bundles and MIs inside bundles.
+The MachineBasicBlock iterator has been modified to skip over bundled MIs to
+enforce the bundle-as-a-single-unit concept. An alternative iterator
+instr_iterator has been added to MachineBasicBlock to allow passes to iterate
+over all of the MIs in a MachineBasicBlock, including those which are nested
+inside bundles. The top level BUNDLE instruction must have the correct set of
+register MachineOperand's that represent the cumulative inputs and outputs of
+the bundled MIs.
+
+Packing / bundling of MachineInstr's should be done as part of the register
+allocation super-pass. More specifically, the pass which determines what MIs
+should be bundled together must be done after code generator exits SSA form
+(i.e. after two-address pass, PHI elimination, and copy coalescing).  Bundles
+should only be finalized (i.e. adding BUNDLE MIs and input and output register
+MachineOperands) after virtual registers have been rewritten into physical
+registers. This requirement eliminates the need to add virtual register operands
+to BUNDLE instructions which would effectively double the virtual register def
+and use lists.
+
+.. _MC Layer:
+
+The "MC" Layer
+==============
+
+The MC Layer is used to represent and process code at the raw machine code
+level, devoid of "high level" information like "constant pools", "jump tables",
+"global variables" or anything like that.  At this level, LLVM handles things
+like label names, machine instructions, and sections in the object file.  The
+code in this layer is used for a number of important purposes: the tail end of
+the code generator uses it to write a .s or .o file, and it is also used by the
+llvm-mc tool to implement standalone machine code assemblers and disassemblers.
+
+This section describes some of the important classes.  There are also a number
+of important subsystems that interact at this layer, they are described later in
+this manual.
+
+.. _MCStreamer:
+
+The ``MCStreamer`` API
+----------------------
+
+MCStreamer is best thought of as an assembler API.  It is an abstract API which
+is *implemented* in different ways (e.g. to output a .s file, output an ELF .o
+file, etc) but whose API correspond directly to what you see in a .s file.
+MCStreamer has one method per directive, such as EmitLabel, EmitSymbolAttribute,
+SwitchSection, EmitValue (for .byte, .word), etc, which directly correspond to
+assembly level directives.  It also has an EmitInstruction method, which is used
+to output an MCInst to the streamer.
+
+This API is most important for two clients: the llvm-mc stand-alone assembler is
+effectively a parser that parses a line, then invokes a method on MCStreamer. In
+the code generator, the `Code Emission`_ phase of the code generator lowers
+higher level LLVM IR and Machine* constructs down to the MC layer, emitting
+directives through MCStreamer.
+
+On the implementation side of MCStreamer, there are two major implementations:
+one for writing out a .s file (MCAsmStreamer), and one for writing out a .o
+file (MCObjectStreamer).  MCAsmStreamer is a straight-forward implementation
+that prints out a directive for each method (e.g. ``EmitValue -> .byte``), but
+MCObjectStreamer implements a full assembler.
+
+The ``MCContext`` class
+-----------------------
+
+The MCContext class is the owner of a variety of uniqued data structures at the
+MC layer, including symbols, sections, etc.  As such, this is the class that you
+interact with to create symbols and sections.  This class can not be subclassed.
+
+The ``MCSymbol`` class
+----------------------
+
+The MCSymbol class represents a symbol (aka label) in the assembly file.  There
+are two interesting kinds of symbols: assembler temporary symbols, and normal
+symbols.  Assembler temporary symbols are used and processed by the assembler
+but are discarded when the object file is produced.  The distinction is usually
+represented by adding a prefix to the label, for example "L" labels are
+assembler temporary labels in MachO.
+
+MCSymbols are created by MCContext and uniqued there.  This means that MCSymbols
+can be compared for pointer equivalence to find out if they are the same symbol.
+Note that pointer inequality does not guarantee the labels will end up at
+different addresses though.  It's perfectly legal to output something like this
+to the .s file:
+
+::
+
+  foo:
+  bar:
+    .byte 4
+
+In this case, both the foo and bar symbols will have the same address.
+
+The ``MCSection`` class
+-----------------------
+
+The ``MCSection`` class represents an object-file specific section. It is
+subclassed by object file specific implementations (e.g. ``MCSectionMachO``,
+``MCSectionCOFF``, ``MCSectionELF``) and these are created and uniqued by
+MCContext.  The MCStreamer has a notion of the current section, which can be
+changed with the SwitchToSection method (which corresponds to a ".section"
+directive in a .s file).
+
+.. _MCInst:
+
+The ``MCInst`` class
+--------------------
+
+The ``MCInst`` class is a target-independent representation of an instruction.
+It is a simple class (much more so than `MachineInstr`_) that holds a
+target-specific opcode and a vector of MCOperands.  MCOperand, in turn, is a
+simple discriminated union of three cases: 1) a simple immediate, 2) a target
+register ID, 3) a symbolic expression (e.g. "``Lfoo-Lbar+42``") as an MCExpr.
+
+MCInst is the common currency used to represent machine instructions at the MC
+layer.  It is the type used by the instruction encoder, the instruction printer,
+and the type generated by the assembly parser and disassembler.
+
+.. _Target-independent algorithms:
+.. _code generation algorithm:
+
+Target-independent code generation algorithms
+=============================================
+
+This section documents the phases described in the `high-level design of the
+code generator`_.  It explains how they work and some of the rationale behind
+their design.
+
+.. _Instruction Selection:
+.. _instruction selection section:
+
+Instruction Selection
+---------------------
+
+Instruction Selection is the process of translating LLVM code presented to the
+code generator into target-specific machine instructions.  There are several
+well-known ways to do this in the literature.  LLVM uses a SelectionDAG based
+instruction selector.
+
+Portions of the DAG instruction selector are generated from the target
+description (``*.td``) files.  Our goal is for the entire instruction selector
+to be generated from these ``.td`` files, though currently there are still
+things that require custom C++ code.
+
+.. _SelectionDAG:
+
+Introduction to SelectionDAGs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The SelectionDAG provides an abstraction for code representation in a way that
+is amenable to instruction selection using automatic techniques
+(e.g. dynamic-programming based optimal pattern matching selectors). It is also
+well-suited to other phases of code generation; in particular, instruction
+scheduling (SelectionDAG's are very close to scheduling DAGs post-selection).
+Additionally, the SelectionDAG provides a host representation where a large
+variety of very-low-level (but target-independent) `optimizations`_ may be
+performed; ones which require extensive information about the instructions
+efficiently supported by the target.
+
+The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the
+``SDNode`` class.  The primary payload of the ``SDNode`` is its operation code
+(Opcode) that indicates what operation the node performs and the operands to the
+operation.  The various operation node types are described at the top of the
+``include/llvm/CodeGen/SelectionDAGNodes.h`` file.
+
+Although most operations define a single value, each node in the graph may
+define multiple values.  For example, a combined div/rem operation will define
+both the dividend and the remainder. Many other situations require multiple
+values as well.  Each node also has some number of operands, which are edges to
+the node defining the used value.  Because nodes may define multiple values,
+edges are represented by instances of the ``SDValue`` class, which is a
+``<SDNode, unsigned>`` pair, indicating the node and result value being used,
+respectively.  Each value produced by an ``SDNode`` has an associated ``MVT``
+(Machine Value Type) indicating what the type of the value is.
+
+SelectionDAGs contain two different kinds of values: those that represent data
+flow and those that represent control flow dependencies.  Data values are simple
+edges with an integer or floating point value type.  Control edges are
+represented as "chain" edges which are of type ``MVT::Other``.  These edges
+provide an ordering between nodes that have side effects (such as loads, stores,
+calls, returns, etc).  All nodes that have side effects should take a token
+chain as input and produce a new one as output.  By convention, token chain
+inputs are always operand #0, and chain results are always the last value
+produced by an operation.
+
+A SelectionDAG has designated "Entry" and "Root" nodes.  The Entry node is
+always a marker node with an Opcode of ``ISD::EntryToken``.  The Root node is
+the final side-effecting node in the token chain. For example, in a single basic
+block function it would be the return node.
+
+One important concept for SelectionDAGs is the notion of a "legal" vs.
+"illegal" DAG.  A legal DAG for a target is one that only uses supported
+operations and supported types.  On a 32-bit PowerPC, for example, a DAG with a
+value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a
+SREM or UREM operation.  The `legalize types`_ and `legalize operations`_ phases
+are responsible for turning an illegal DAG into a legal DAG.
+
+SelectionDAG Instruction Selection Process
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SelectionDAG-based instruction selection consists of the following steps:
+
+#. `Build initial DAG`_ --- This stage performs a simple translation from the
+   input LLVM code to an illegal SelectionDAG.
+
+#. `Optimize SelectionDAG`_ --- This stage performs simple optimizations on the
+   SelectionDAG to simplify it, and recognize meta instructions (like rotates
+   and ``div``/``rem`` pairs) for targets that support these meta operations.
+   This makes the resultant code more efficient and the `select instructions
+   from DAG`_ phase (below) simpler.
+
+#. `Legalize SelectionDAG Types`_ --- This stage transforms SelectionDAG nodes
+   to eliminate any types that are unsupported on the target.
+
+#. `Optimize SelectionDAG`_ --- The SelectionDAG optimizer is run to clean up
+   redundancies exposed by type legalization.
+
+#. `Legalize SelectionDAG Ops`_ --- This stage transforms SelectionDAG nodes to
+   eliminate any operations that are unsupported on the target.
+
+#. `Optimize SelectionDAG`_ --- The SelectionDAG optimizer is run to eliminate
+   inefficiencies introduced by operation legalization.
+
+#. `Select instructions from DAG`_ --- Finally, the target instruction selector
+   matches the DAG operations to target instructions.  This process translates
+   the target-independent input DAG into another DAG of target instructions.
+
+#. `SelectionDAG Scheduling and Formation`_ --- The last phase assigns a linear
+   order to the instructions in the target-instruction DAG and emits them into
+   the MachineFunction being compiled.  This step uses traditional prepass
+   scheduling techniques.
+
+After all of these steps are complete, the SelectionDAG is destroyed and the
+rest of the code generation passes are run.
+
+One great way to visualize what is going on here is to take advantage of a few
+LLC command line options.  The following options pop up a window displaying the
+SelectionDAG at specific times (if you only get errors printed to the console
+while using this, you probably `need to configure your
+system <ProgrammersManual.html#ViewGraph>`_ to add support for it).
+
+* ``-view-dag-combine1-dags`` displays the DAG after being built, before the
+  first optimization pass.
+
+* ``-view-legalize-dags`` displays the DAG before Legalization.
+
+* ``-view-dag-combine2-dags`` displays the DAG before the second optimization
+  pass.
+
+* ``-view-isel-dags`` displays the DAG before the Select phase.
+
+* ``-view-sched-dags`` displays the DAG before Scheduling.
+
+The ``-view-sunit-dags`` displays the Scheduler's dependency graph.  This graph
+is based on the final SelectionDAG, with nodes that must be scheduled together
+bundled into a single scheduling-unit node, and with immediate operands and
+other nodes that aren't relevant for scheduling omitted.
+
+.. _Build initial DAG:
+
+Initial SelectionDAG Construction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The initial SelectionDAG is na\ :raw-html:`&iuml;`\ vely peephole expanded from
+the LLVM input by the ``SelectionDAGLowering`` class in the
+``lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp`` file.  The intent of this pass
+is to expose as much low-level, target-specific details to the SelectionDAG as
+possible.  This pass is mostly hard-coded (e.g. an LLVM ``add`` turns into an
+``SDNode add`` while a ``getelementptr`` is expanded into the obvious
+arithmetic). This pass requires target-specific hooks to lower calls, returns,
+varargs, etc.  For these features, the :raw-html:`<tt>` `TargetLowering`_
+:raw-html:`</tt>` interface is used.
+
+.. _legalize types:
+.. _Legalize SelectionDAG Types:
+.. _Legalize SelectionDAG Ops:
+
+SelectionDAG LegalizeTypes Phase
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Legalize phase is in charge of converting a DAG to only use the types that
+are natively supported by the target.
+
+There are two main ways of converting values of unsupported scalar types to
+values of supported types: converting small types to larger types ("promoting"),
+and breaking up large integer types into smaller ones ("expanding").  For
+example, a target might require that all f32 values are promoted to f64 and that
+all i1/i8/i16 values are promoted to i32.  The same target might require that
+all i64 values be expanded into pairs of i32 values.  These changes can insert
+sign and zero extensions as needed to make sure that the final code has the same
+behavior as the input.
+
+There are two main ways of converting values of unsupported vector types to
+value of supported types: splitting vector types, multiple times if necessary,
+until a legal type is found, and extending vector types by adding elements to
+the end to round them out to legal types ("widening").  If a vector gets split
+all the way down to single-element parts with no supported vector type being
+found, the elements are converted to scalars ("scalarizing").
+
+A target implementation tells the legalizer which types are supported (and which
+register class to use for them) by calling the ``addRegisterClass`` method in
+its TargetLowering constructor.
+
+.. _legalize operations:
+.. _Legalizer:
+
+SelectionDAG Legalize Phase
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Legalize phase is in charge of converting a DAG to only use the operations
+that are natively supported by the target.
+
+Targets often have weird constraints, such as not supporting every operation on
+every supported datatype (e.g. X86 does not support byte conditional moves and
+PowerPC does not support sign-extending loads from a 16-bit memory location).
+Legalize takes care of this by open-coding another sequence of operations to
+emulate the operation ("expansion"), by promoting one type to a larger type that
+supports the operation ("promotion"), or by using a target-specific hook to
+implement the legalization ("custom").
+
+A target implementation tells the legalizer which operations are not supported
+(and which of the above three actions to take) by calling the
+``setOperationAction`` method in its ``TargetLowering`` constructor.
+
+Prior to the existence of the Legalize passes, we required that every target
+`selector`_ supported and handled every operator and type even if they are not
+natively supported.  The introduction of the Legalize phases allows all of the
+canonicalization patterns to be shared across targets, and makes it very easy to
+optimize the canonicalized code because it is still in the form of a DAG.
+
+.. _optimizations:
+.. _Optimize SelectionDAG:
+.. _selector:
+
+SelectionDAG Optimization Phase: the DAG Combiner
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The SelectionDAG optimization phase is run multiple times for code generation,
+immediately after the DAG is built and once after each legalization.  The first
+run of the pass allows the initial code to be cleaned up (e.g. performing
+optimizations that depend on knowing that the operators have restricted type
+inputs).  Subsequent runs of the pass clean up the messy code generated by the
+Legalize passes, which allows Legalize to be very simple (it can focus on making
+code legal instead of focusing on generating *good* and legal code).
+
+One important class of optimizations performed is optimizing inserted sign and
+zero extension instructions.  We currently use ad-hoc techniques, but could move
+to more rigorous techniques in the future.  Here are some good papers on the
+subject:
+
+"`Widening integer arithmetic <http://www.eecs.harvard.edu/~nr/pubs/widen-abstract.html>`_" :raw-html:`<br>`
+Kevin Redwine and Norman Ramsey :raw-html:`<br>`
+International Conference on Compiler Construction (CC) 2004
+
+"`Effective sign extension elimination <http://portal.acm.org/citation.cfm?doid=512529.512552>`_"  :raw-html:`<br>`
+Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani :raw-html:`<br>`
+Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design
+and Implementation.
+
+.. _Select instructions from DAG:
+
+SelectionDAG Select Phase
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Select phase is the bulk of the target-specific code for instruction
+selection.  This phase takes a legal SelectionDAG as input, pattern matches the
+instructions supported by the target to this DAG, and produces a new DAG of
+target code.  For example, consider the following LLVM fragment:
+
+.. code-block:: llvm
+
+  %t1 = fadd float %W, %X
+  %t2 = fmul float %t1, %Y
+  %t3 = fadd float %t2, %Z
+
+This LLVM code corresponds to a SelectionDAG that looks basically like this:
+
+.. code-block:: llvm
+
+  (fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
+
+If a target supports floating point multiply-and-add (FMA) operations, one of
+the adds can be merged with the multiply.  On the PowerPC, for example, the
+output of the instruction selector might look like this DAG:
+
+::
+
+  (FMADDS (FADDS W, X), Y, Z)
+
+The ``FMADDS`` instruction is a ternary instruction that multiplies its first
+two operands and adds the third (as single-precision floating-point numbers).
+The ``FADDS`` instruction is a simple binary single-precision add instruction.
+To perform this pattern match, the PowerPC backend includes the following
+instruction definitions:
+
+::
+
+  def FMADDS : AForm_1<59, 29,
+                      (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fmadds $FRT, $FRA, $FRC, $FRB",
+                      [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                             F4RC:$FRB))]>;
+  def FADDS : AForm_2<59, 21,
+                      (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+                      "fadds $FRT, $FRA, $FRB",
+                      [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+
+The portion of the instruction definition in bold indicates the pattern used to
+match the instruction.  The DAG operators (like ``fmul``/``fadd``) are defined
+in the ``include/llvm/Target/TargetSelectionDAG.td`` file.  " ``F4RC``" is the
+register class of the input and result values.
+
+The TableGen DAG instruction selector generator reads the instruction patterns
+in the ``.td`` file and automatically builds parts of the pattern matching code
+for your target.  It has the following strengths:
+
+* At compiler-compiler time, it analyzes your instruction patterns and tells you
+  if your patterns make sense or not.
+
+* It can handle arbitrary constraints on operands for the pattern match.  In
+  particular, it is straight-forward to say things like "match any immediate
+  that is a 13-bit sign-extended value".  For examples, see the ``immSExt16``
+  and related ``tblgen`` classes in the PowerPC backend.
+
+* It knows several important identities for the patterns defined.  For example,
+  it knows that addition is commutative, so it allows the ``FMADDS`` pattern
+  above to match "``(fadd X, (fmul Y, Z))``" as well as "``(fadd (fmul X, Y),
+  Z)``", without the target author having to specially handle this case.
+
+* It has a full-featured type-inferencing system.  In particular, you should
+  rarely have to explicitly tell the system what type parts of your patterns
+  are.  In the ``FMADDS`` case above, we didn't have to tell ``tblgen`` that all
+  of the nodes in the pattern are of type 'f32'.  It was able to infer and
+  propagate this knowledge from the fact that ``F4RC`` has type 'f32'.
+
+* Targets can define their own (and rely on built-in) "pattern fragments".
+  Pattern fragments are chunks of reusable patterns that get inlined into your
+  patterns during compiler-compiler time.  For example, the integer "``(not
+  x)``" operation is actually defined as a pattern fragment that expands as
+  "``(xor x, -1)``", since the SelectionDAG does not have a native '``not``'
+  operation.  Targets can define their own short-hand fragments as they see fit.
+  See the definition of '``not``' and '``ineg``' for examples.
+
+* In addition to instructions, targets can specify arbitrary patterns that map
+  to one or more instructions using the 'Pat' class.  For example, the PowerPC
+  has no way to load an arbitrary integer immediate into a register in one
+  instruction. To tell tblgen how to do this, it defines:
+
+  ::
+
+    // Arbitrary immediate support.  Implement in terms of LIS/ORI.
+    def : Pat<(i32 imm:$imm),
+              (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+  If none of the single-instruction patterns for loading an immediate into a
+  register match, this will be used.  This rule says "match an arbitrary i32
+  immediate, turning it into an ``ORI`` ('or a 16-bit immediate') and an ``LIS``
+  ('load 16-bit immediate, where the immediate is shifted to the left 16 bits')
+  instruction".  To make this work, the ``LO16``/``HI16`` node transformations
+  are used to manipulate the input immediate (in this case, take the high or low
+  16-bits of the immediate).
+
+* While the system does automate a lot, it still allows you to write custom C++
+  code to match special cases if there is something that is hard to
+  express.
+
+While it has many strengths, the system currently has some limitations,
+primarily because it is a work in progress and is not yet finished:
+
+* Overall, there is no way to define or match SelectionDAG nodes that define
+  multiple values (e.g. ``SMUL_LOHI``, ``LOAD``, ``CALL``, etc).  This is the
+  biggest reason that you currently still *have to* write custom C++ code
+  for your instruction selector.
+
+* There is no great way to support matching complex addressing modes yet.  In
+  the future, we will extend pattern fragments to allow them to define multiple
+  values (e.g. the four operands of the `X86 addressing mode`_, which are
+  currently matched with custom C++ code).  In addition, we'll extend fragments
+  so that a fragment can match multiple different patterns.
+
+* We don't automatically infer flags like ``isStore``/``isLoad`` yet.
+
+* We don't automatically generate the set of supported registers and operations
+  for the `Legalizer`_ yet.
+
+* We don't have a way of tying in custom legalized nodes yet.
+
+Despite these limitations, the instruction selector generator is still quite
+useful for most of the binary and logical operations in typical instruction
+sets.  If you run into any problems or can't figure out how to do something,
+please let Chris know!
+
+.. _Scheduling and Formation:
+.. _SelectionDAG Scheduling and Formation:
+
+SelectionDAG Scheduling and Formation Phase
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The scheduling phase takes the DAG of target instructions from the selection
+phase and assigns an order.  The scheduler can pick an order depending on
+various constraints of the machines (i.e. order for minimal register pressure or
+try to cover instruction latencies).  Once an order is established, the DAG is
+converted to a list of :raw-html:`<tt>` `MachineInstr`_\s :raw-html:`</tt>` and
+the SelectionDAG is destroyed.
+
+Note that this phase is logically separate from the instruction selection phase,
+but is tied to it closely in the code because it operates on SelectionDAGs.
+
+Future directions for the SelectionDAG
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. Optional function-at-a-time selection.
+
+#. Auto-generate entire selector from ``.td`` file.
+
+.. _SSA-based Machine Code Optimizations:
+
+SSA-based Machine Code Optimizations
+------------------------------------
+
+To Be Written
+
+Live Intervals
+--------------
+
+Live Intervals are the ranges (intervals) where a variable is *live*.  They are
+used by some `register allocator`_ passes to determine if two or more virtual
+registers which require the same physical register are live at the same point in
+the program (i.e., they conflict).  When this situation occurs, one virtual
+register must be *spilled*.
+
+Live Variable Analysis
+^^^^^^^^^^^^^^^^^^^^^^
+
+The first step in determining the live intervals of variables is to calculate
+the set of registers that are immediately dead after the instruction (i.e., the
+instruction calculates the value, but it is never used) and the set of registers
+that are used by the instruction, but are never used after the instruction
+(i.e., they are killed). Live variable information is computed for
+each *virtual* register and *register allocatable* physical register
+in the function.  This is done in a very efficient manner because it uses SSA to
+sparsely compute lifetime information for virtual registers (which are in SSA
+form) and only has to track physical registers within a block.  Before register
+allocation, LLVM can assume that physical registers are only live within a
+single basic block.  This allows it to do a single, local analysis to resolve
+physical register lifetimes within each basic block. If a physical register is
+not register allocatable (e.g., a stack pointer or condition codes), it is not
+tracked.
+
+Physical registers may be live in to or out of a function. Live in values are
+typically arguments in registers. Live out values are typically return values in
+registers. Live in values are marked as such, and are given a dummy "defining"
+instruction during live intervals analysis. If the last basic block of a
+function is a ``return``, then it's marked as using all live out values in the
+function.
+
+``PHI`` nodes need to be handled specially, because the calculation of the live
+variable information from a depth first traversal of the CFG of the function
+won't guarantee that a virtual register used by the ``PHI`` node is defined
+before it's used. When a ``PHI`` node is encountered, only the definition is
+handled, because the uses will be handled in other basic blocks.
+
+For each ``PHI`` node of the current basic block, we simulate an assignment at
+the end of the current basic block and traverse the successor basic blocks. If a
+successor basic block has a ``PHI`` node and one of the ``PHI`` node's operands
+is coming from the current basic block, then the variable is marked as *alive*
+within the current basic block and all of its predecessor basic blocks, until
+the basic block with the defining instruction is encountered.
+
+Live Intervals Analysis
+^^^^^^^^^^^^^^^^^^^^^^^
+
+We now have the information available to perform the live intervals analysis and
+build the live intervals themselves.  We start off by numbering the basic blocks
+and machine instructions.  We then handle the "live-in" values.  These are in
+physical registers, so the physical register is assumed to be killed by the end
+of the basic block.  Live intervals for virtual registers are computed for some
+ordering of the machine instructions ``[1, N]``.  A live interval is an interval
+``[i, j)``, where ``1 >= i >= j > N``, for which a variable is live.
+
+.. note::
+  More to come...
+
+.. _Register Allocation:
+.. _register allocator:
+
+Register Allocation
+-------------------
+
+The *Register Allocation problem* consists in mapping a program
+:raw-html:`<b><tt>` P\ :sub:`v`\ :raw-html:`</tt></b>`, that can use an unbounded
+number of virtual registers, to a program :raw-html:`<b><tt>` P\ :sub:`p`\
+:raw-html:`</tt></b>` that contains a finite (possibly small) number of physical
+registers. Each target architecture has a different number of physical
+registers. If the number of physical registers is not enough to accommodate all
+the virtual registers, some of them will have to be mapped into memory. These
+virtuals are called *spilled virtuals*.
+
+How registers are represented in LLVM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In LLVM, physical registers are denoted by integer numbers that normally range
+from 1 to 1023. To see how this numbering is defined for a particular
+architecture, you can read the ``GenRegisterNames.inc`` file for that
+architecture. For instance, by inspecting
+``lib/Target/X86/X86GenRegisterInfo.inc`` we see that the 32-bit register
+``EAX`` is denoted by 43, and the MMX register ``MM0`` is mapped to 65.
+
+Some architectures contain registers that share the same physical location. A
+notable example is the X86 platform. For instance, in the X86 architecture, the
+registers ``EAX``, ``AX`` and ``AL`` share the first eight bits. These physical
+registers are marked as *aliased* in LLVM. Given a particular architecture, you
+can check which registers are aliased by inspecting its ``RegisterInfo.td``
+file. Moreover, the class ``MCRegAliasIterator`` enumerates all the physical
+registers aliased to a register.
+
+Physical registers, in LLVM, are grouped in *Register Classes*.  Elements in the
+same register class are functionally equivalent, and can be interchangeably
+used. Each virtual register can only be mapped to physical registers of a
+particular class. For instance, in the X86 architecture, some virtuals can only
+be allocated to 8 bit registers.  A register class is described by
+``TargetRegisterClass`` objects.  To discover if a virtual register is
+compatible with a given physical, this code can be used:</p>
+
+.. code-block:: c++
+
+  bool RegMapping_Fer::compatible_class(MachineFunction &mf,
+                                        unsigned v_reg,
+                                        unsigned p_reg) {
+    assert(TargetRegisterInfo::isPhysicalRegister(p_reg) &&
+           "Target register must be physical");
+    const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg);
+    return trc->contains(p_reg);
+  }
+
+Sometimes, mostly for debugging purposes, it is useful to change the number of
+physical registers available in the target architecture. This must be done
+statically, inside the ``TargetRegsterInfo.td`` file. Just ``grep`` for
+``RegisterClass``, the last parameter of which is a list of registers. Just
+commenting some out is one simple way to avoid them being used. A more polite
+way is to explicitly exclude some registers from the *allocation order*. See the
+definition of the ``GR8`` register class in
+``lib/Target/X86/X86RegisterInfo.td`` for an example of this.
+
+Virtual registers are also denoted by integer numbers. Contrary to physical
+registers, different virtual registers never share the same number. Whereas
+physical registers are statically defined in a ``TargetRegisterInfo.td`` file
+and cannot be created by the application developer, that is not the case with
+virtual registers. In order to create new virtual registers, use the method
+``MachineRegisterInfo::createVirtualRegister()``. This method will return a new
+virtual register. Use an ``IndexedMap<Foo, VirtReg2IndexFunctor>`` to hold
+information per virtual register. If you need to enumerate all virtual
+registers, use the function ``TargetRegisterInfo::index2VirtReg()`` to find the
+virtual register numbers:
+
+.. code-block:: c++
+
+    for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+      unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
+      stuff(VirtReg);
+    }
+
+Before register allocation, the operands of an instruction are mostly virtual
+registers, although physical registers may also be used. In order to check if a
+given machine operand is a register, use the boolean function
+``MachineOperand::isRegister()``. To obtain the integer code of a register, use
+``MachineOperand::getReg()``. An instruction may define or use a register. For
+instance, ``ADD reg:1026 := reg:1025 reg:1024`` defines the registers 1024, and
+uses registers 1025 and 1026. Given a register operand, the method
+``MachineOperand::isUse()`` informs if that register is being used by the
+instruction. The method ``MachineOperand::isDef()`` informs if that registers is
+being defined.
+
+We will call physical registers present in the LLVM bitcode before register
+allocation *pre-colored registers*. Pre-colored registers are used in many
+different situations, for instance, to pass parameters of functions calls, and
+to store results of particular instructions. There are two types of pre-colored
+registers: the ones *implicitly* defined, and those *explicitly*
+defined. Explicitly defined registers are normal operands, and can be accessed
+with ``MachineInstr::getOperand(int)::getReg()``.  In order to check which
+registers are implicitly defined by an instruction, use the
+``TargetInstrInfo::get(opcode)::ImplicitDefs``, where ``opcode`` is the opcode
+of the target instruction. One important difference between explicit and
+implicit physical registers is that the latter are defined statically for each
+instruction, whereas the former may vary depending on the program being
+compiled. For example, an instruction that represents a function call will
+always implicitly define or use the same set of physical registers. To read the
+registers implicitly used by an instruction, use
+``TargetInstrInfo::get(opcode)::ImplicitUses``. Pre-colored registers impose
+constraints on any register allocation algorithm. The register allocator must
+make sure that none of them are overwritten by the values of virtual registers
+while still alive.
+
+Mapping virtual registers to physical registers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+There are two ways to map virtual registers to physical registers (or to memory
+slots). The first way, that we will call *direct mapping*, is based on the use
+of methods of the classes ``TargetRegisterInfo``, and ``MachineOperand``. The
+second way, that we will call *indirect mapping*, relies on the ``VirtRegMap``
+class in order to insert loads and stores sending and getting values to and from
+memory.
+
+The direct mapping provides more flexibility to the developer of the register
+allocator; however, it is more error prone, and demands more implementation
+work.  Basically, the programmer will have to specify where load and store
+instructions should be inserted in the target function being compiled in order
+to get and store values in memory. To assign a physical register to a virtual
+register present in a given operand, use ``MachineOperand::setReg(p_reg)``. To
+insert a store instruction, use ``TargetInstrInfo::storeRegToStackSlot(...)``,
+and to insert a load instruction, use ``TargetInstrInfo::loadRegFromStackSlot``.
+
+The indirect mapping shields the application developer from the complexities of
+inserting load and store instructions. In order to map a virtual register to a
+physical one, use ``VirtRegMap::assignVirt2Phys(vreg, preg)``.  In order to map
+a certain virtual register to memory, use
+``VirtRegMap::assignVirt2StackSlot(vreg)``. This method will return the stack
+slot where ``vreg``'s value will be located.  If it is necessary to map another
+virtual register to the same stack slot, use
+``VirtRegMap::assignVirt2StackSlot(vreg, stack_location)``. One important point
+to consider when using the indirect mapping, is that even if a virtual register
+is mapped to memory, it still needs to be mapped to a physical register. This
+physical register is the location where the virtual register is supposed to be
+found before being stored or after being reloaded.
+
+If the indirect strategy is used, after all the virtual registers have been
+mapped to physical registers or stack slots, it is necessary to use a spiller
+object to place load and store instructions in the code. Every virtual that has
+been mapped to a stack slot will be stored to memory after been defined and will
+be loaded before being used. The implementation of the spiller tries to recycle
+load/store instructions, avoiding unnecessary instructions. For an example of
+how to invoke the spiller, see ``RegAllocLinearScan::runOnMachineFunction`` in
+``lib/CodeGen/RegAllocLinearScan.cpp``.
+
+Handling two address instructions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+With very rare exceptions (e.g., function calls), the LLVM machine code
+instructions are three address instructions. That is, each instruction is
+expected to define at most one register, and to use at most two registers.
+However, some architectures use two address instructions. In this case, the
+defined register is also one of the used register. For instance, an instruction
+such as ``ADD %EAX, %EBX``, in X86 is actually equivalent to ``%EAX = %EAX +
+%EBX``.
+
+In order to produce correct code, LLVM must convert three address instructions
+that represent two address instructions into true two address instructions. LLVM
+provides the pass ``TwoAddressInstructionPass`` for this specific purpose. It
+must be run before register allocation takes place. After its execution, the
+resulting code may no longer be in SSA form. This happens, for instance, in
+situations where an instruction such as ``%a = ADD %b %c`` is converted to two
+instructions such as:
+
+::
+
+  %a = MOVE %b
+  %a = ADD %a %c
+
+Notice that, internally, the second instruction is represented as ``ADD
+%a[def/use] %c``. I.e., the register operand ``%a`` is both used and defined by
+the instruction.
+
+The SSA deconstruction phase
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+An important transformation that happens during register allocation is called
+the *SSA Deconstruction Phase*. The SSA form simplifies many analyses that are
+performed on the control flow graph of programs. However, traditional
+instruction sets do not implement PHI instructions. Thus, in order to generate
+executable code, compilers must replace PHI instructions with other instructions
+that preserve their semantics.
+
+There are many ways in which PHI instructions can safely be removed from the
+target code. The most traditional PHI deconstruction algorithm replaces PHI
+instructions with copy instructions. That is the strategy adopted by LLVM. The
+SSA deconstruction algorithm is implemented in
+``lib/CodeGen/PHIElimination.cpp``. In order to invoke this pass, the identifier
+``PHIEliminationID`` must be marked as required in the code of the register
+allocator.
+
+Instruction folding
+^^^^^^^^^^^^^^^^^^^
+
+*Instruction folding* is an optimization performed during register allocation
+that removes unnecessary copy instructions. For instance, a sequence of
+instructions such as:
+
+::
+
+  %EBX = LOAD %mem_address
+  %EAX = COPY %EBX
+
+can be safely substituted by the single instruction:
+
+::
+
+  %EAX = LOAD %mem_address
+
+Instructions can be folded with the
+``TargetRegisterInfo::foldMemoryOperand(...)`` method. Care must be taken when
+folding instructions; a folded instruction can be quite different from the
+original instruction. See ``LiveIntervals::addIntervalsForSpills`` in
+``lib/CodeGen/LiveIntervalAnalysis.cpp`` for an example of its use.
+
+Built in register allocators
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The LLVM infrastructure provides the application developer with three different
+register allocators:
+
+* *Fast* --- This register allocator is the default for debug builds. It
+  allocates registers on a basic block level, attempting to keep values in
+  registers and reusing registers as appropriate.
+
+* *Basic* --- This is an incremental approach to register allocation. Live
+  ranges are assigned to registers one at a time in an order that is driven by
+  heuristics. Since code can be rewritten on-the-fly during allocation, this
+  framework allows interesting allocators to be developed as extensions. It is
+  not itself a production register allocator but is a potentially useful
+  stand-alone mode for triaging bugs and as a performance baseline.
+
+* *Greedy* --- *The default allocator*. This is a highly tuned implementation of
+  the *Basic* allocator that incorporates global live range splitting. This
+  allocator works hard to minimize the cost of spill code.
+
+* *PBQP* --- A Partitioned Boolean Quadratic Programming (PBQP) based register
+  allocator. This allocator works by constructing a PBQP problem representing
+  the register allocation problem under consideration, solving this using a PBQP
+  solver, and mapping the solution back to a register assignment.
+
+The type of register allocator used in ``llc`` can be chosen with the command
+line option ``-regalloc=...``:
+
+.. code-block:: bash
+
+  $ llc -regalloc=linearscan file.bc -o ln.s
+  $ llc -regalloc=fast file.bc -o fa.s
+  $ llc -regalloc=pbqp file.bc -o pbqp.s
+
+.. _Prolog/Epilog Code Insertion:
+
+Prolog/Epilog Code Insertion
+----------------------------
+
+Compact Unwind
+
+Throwing an exception requires *unwinding* out of a function. The information on
+how to unwind a given function is traditionally expressed in DWARF unwind
+(a.k.a. frame) info. But that format was originally developed for debuggers to
+backtrace, and each Frame Description Entry (FDE) requires ~20-30 bytes per
+function. There is also the cost of mapping from an address in a function to the
+corresponding FDE at runtime. An alternative unwind encoding is called *compact
+unwind* and requires just 4-bytes per function.
+
+The compact unwind encoding is a 32-bit value, which is encoded in an
+architecture-specific way. It specifies which registers to restore and from
+where, and how to unwind out of the function. When the linker creates a final
+linked image, it will create a ``__TEXT,__unwind_info`` section. This section is
+a small and fast way for the runtime to access unwind info for any given
+function. If we emit compact unwind info for the function, that compact unwind
+info will be encoded in the ``__TEXT,__unwind_info`` section. If we emit DWARF
+unwind info, the ``__TEXT,__unwind_info`` section will contain the offset of the
+FDE in the ``__TEXT,__eh_frame`` section in the final linked image.
+
+For X86, there are three modes for the compact unwind encoding:
+
+*Function with a Frame Pointer (``EBP`` or ``RBP``)*
+  ``EBP/RBP``-based frame, where ``EBP/RBP`` is pushed onto the stack
+  immediately after the return address, then ``ESP/RSP`` is moved to
+  ``EBP/RBP``. Thus to unwind, ``ESP/RSP`` is restored with the current
+  ``EBP/RBP`` value, then ``EBP/RBP`` is restored by popping the stack, and the
+  return is done by popping the stack once more into the PC. All non-volatile
+  registers that need to be restored must have been saved in a small range on
+  the stack that starts ``EBP-4`` to ``EBP-1020`` (``RBP-8`` to
+  ``RBP-1020``). The offset (divided by 4 in 32-bit mode and 8 in 64-bit mode)
+  is encoded in bits 16-23 (mask: ``0x00FF0000``).  The registers saved are
+  encoded in bits 0-14 (mask: ``0x00007FFF``) as five 3-bit entries from the
+  following table:
+
+    ==============  =============  ===============
+    Compact Number  i386 Register  x86-64 Register
+    ==============  =============  ===============
+    1               ``EBX``        ``RBX``
+    2               ``ECX``        ``R12``
+    3               ``EDX``        ``R13``
+    4               ``EDI``        ``R14``
+    5               ``ESI``        ``R15``
+    6               ``EBP``        ``RBP``
+    ==============  =============  ===============
+
+*Frameless with a Small Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)*
+  To return, a constant (encoded in the compact unwind encoding) is added to the
+  ``ESP/RSP``.  Then the return is done by popping the stack into the PC. All
+  non-volatile registers that need to be restored must have been saved on the
+  stack immediately after the return address. The stack size (divided by 4 in
+  32-bit mode and 8 in 64-bit mode) is encoded in bits 16-23 (mask:
+  ``0x00FF0000``). There is a maximum stack size of 1024 bytes in 32-bit mode
+  and 2048 in 64-bit mode. The number of registers saved is encoded in bits 9-12
+  (mask: ``0x00001C00``). Bits 0-9 (mask: ``0x000003FF``) contain which
+  registers were saved and their order. (See the
+  ``encodeCompactUnwindRegistersWithoutFrame()`` function in
+  ``lib/Target/X86FrameLowering.cpp`` for the encoding algorithm.)
+
+*Frameless with a Large Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)*
+  This case is like the "Frameless with a Small Constant Stack Size" case, but
+  the stack size is too large to encode in the compact unwind encoding. Instead
+  it requires that the function contains "``subl $nnnnnn, %esp``" in its
+  prolog. The compact encoding contains the offset to the ``$nnnnnn`` value in
+  the function in bits 9-12 (mask: ``0x00001C00``).
+
+.. _Late Machine Code Optimizations:
+
+Late Machine Code Optimizations
+-------------------------------
+
+.. note::
+
+  To Be Written
+
+.. _Code Emission:
+
+Code Emission
+-------------
+
+The code emission step of code generation is responsible for lowering from the
+code generator abstractions (like `MachineFunction`_, `MachineInstr`_, etc) down
+to the abstractions used by the MC layer (`MCInst`_, `MCStreamer`_, etc).  This
+is done with a combination of several different classes: the (misnamed)
+target-independent AsmPrinter class, target-specific subclasses of AsmPrinter
+(such as SparcAsmPrinter), and the TargetLoweringObjectFile class.
+
+Since the MC layer works at the level of abstraction of object files, it doesn't
+have a notion of functions, global variables etc.  Instead, it thinks about
+labels, directives, and instructions.  A key class used at this time is the
+MCStreamer class.  This is an abstract API that is implemented in different ways
+(e.g. to output a .s file, output an ELF .o file, etc) that is effectively an
+"assembler API".  MCStreamer has one method per directive, such as EmitLabel,
+EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly
+level directives.
+
+If you are interested in implementing a code generator for a target, there are
+three important things that you have to implement for your target:
+
+#. First, you need a subclass of AsmPrinter for your target.  This class
+   implements the general lowering process converting MachineFunction's into MC
+   label constructs.  The AsmPrinter base class provides a number of useful
+   methods and routines, and also allows you to override the lowering process in
+   some important ways.  You should get much of the lowering for free if you are
+   implementing an ELF, COFF, or MachO target, because the
+   TargetLoweringObjectFile class implements much of the common logic.
+
+#. Second, you need to implement an instruction printer for your target.  The
+   instruction printer takes an `MCInst`_ and renders it to a raw_ostream as
+   text.  Most of this is automatically generated from the .td file (when you
+   specify something like "``add $dst, $src1, $src2``" in the instructions), but
+   you need to implement routines to print operands.
+
+#. Third, you need to implement code that lowers a `MachineInstr`_ to an MCInst,
+   usually implemented in "<target>MCInstLower.cpp".  This lowering process is
+   often target specific, and is responsible for turning jump table entries,
+   constant pool indices, global variable addresses, etc into MCLabels as
+   appropriate.  This translation layer is also responsible for expanding pseudo
+   ops used by the code generator into the actual machine instructions they
+   correspond to. The MCInsts that are generated by this are fed into the
+   instruction printer or the encoder.
+
+Finally, at your choosing, you can also implement an subclass of MCCodeEmitter
+which lowers MCInst's into machine code bytes and relocations.  This is
+important if you want to support direct .o file emission, or would like to
+implement an assembler for your target.
+
+VLIW Packetizer
+---------------
+
+In a Very Long Instruction Word (VLIW) architecture, the compiler is responsible
+for mapping instructions to functional-units available on the architecture. To
+that end, the compiler creates groups of instructions called *packets* or
+*bundles*. The VLIW packetizer in LLVM is a target-independent mechanism to
+enable the packetization of machine instructions.
+
+Mapping from instructions to functional units
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Instructions in a VLIW target can typically be mapped to multiple functional
+units. During the process of packetizing, the compiler must be able to reason
+about whether an instruction can be added to a packet. This decision can be
+complex since the compiler has to examine all possible mappings of instructions
+to functional units. Therefore to alleviate compilation-time complexity, the
+VLIW packetizer parses the instruction classes of a target and generates tables
+at compiler build time. These tables can then be queried by the provided
+machine-independent API to determine if an instruction can be accommodated in a
+packet.
+
+How the packetization tables are generated and used
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The packetizer reads instruction classes from a target's itineraries and creates
+a deterministic finite automaton (DFA) to represent the state of a packet. A DFA
+consists of three major elements: inputs, states, and transitions. The set of
+inputs for the generated DFA represents the instruction being added to a
+packet. The states represent the possible consumption of functional units by
+instructions in a packet. In the DFA, transitions from one state to another
+occur on the addition of an instruction to an existing packet. If there is a
+legal mapping of functional units to instructions, then the DFA contains a
+corresponding transition. The absence of a transition indicates that a legal
+mapping does not exist and that the instruction cannot be added to the packet.
+
+To generate tables for a VLIW target, add *Target*\ GenDFAPacketizer.inc as a
+target to the Makefile in the target directory. The exported API provides three
+functions: ``DFAPacketizer::clearResources()``,
+``DFAPacketizer::reserveResources(MachineInstr *MI)``, and
+``DFAPacketizer::canReserveResources(MachineInstr *MI)``. These functions allow
+a target packetizer to add an instruction to an existing packet and to check
+whether an instruction can be added to a packet. See
+``llvm/CodeGen/DFAPacketizer.h`` for more information.
+
+Implementing a Native Assembler
+===============================
+
+Though you're probably reading this because you want to write or maintain a
+compiler backend, LLVM also fully supports building a native assemblers too.
+We've tried hard to automate the generation of the assembler from the .td files
+(in particular the instruction syntax and encodings), which means that a large
+part of the manual and repetitive data entry can be factored and shared with the
+compiler.
+
+Instruction Parsing
+-------------------
+
+.. note::
+
+  To Be Written
+
+
+Instruction Alias Processing
+----------------------------
+
+Once the instruction is parsed, it enters the MatchInstructionImpl function.
+The MatchInstructionImpl function performs alias processing and then does actual
+matching.
+
+Alias processing is the phase that canonicalizes different lexical forms of the
+same instructions down to one representation.  There are several different kinds
+of alias that are possible to implement and they are listed below in the order
+that they are processed (which is in order from simplest/weakest to most
+complex/powerful).  Generally you want to use the first alias mechanism that
+meets the needs of your instruction, because it will allow a more concise
+description.
+
+Mnemonic Aliases
+^^^^^^^^^^^^^^^^
+
+The first phase of alias processing is simple instruction mnemonic remapping for
+classes of instructions which are allowed with two different mnemonics.  This
+phase is a simple and unconditionally remapping from one input mnemonic to one
+output mnemonic.  It isn't possible for this form of alias to look at the
+operands at all, so the remapping must apply for all forms of a given mnemonic.
+Mnemonic aliases are defined simply, for example X86 has:
+
+::
+
+  def : MnemonicAlias<"cbw",     "cbtw">;
+  def : MnemonicAlias<"smovq",   "movsq">;
+  def : MnemonicAlias<"fldcww",  "fldcw">;
+  def : MnemonicAlias<"fucompi", "fucomip">;
+  def : MnemonicAlias<"ud2a",    "ud2">;
+
+... and many others.  With a MnemonicAlias definition, the mnemonic is remapped
+simply and directly.  Though MnemonicAlias's can't look at any aspect of the
+instruction (such as the operands) they can depend on global modes (the same
+ones supported by the matcher), through a Requires clause:
+
+::
+
+  def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+  def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+
+In this example, the mnemonic gets mapped into different a new one depending on
+the current instruction set.
+
+Instruction Aliases
+^^^^^^^^^^^^^^^^^^^
+
+The most general phase of alias processing occurs while matching is happening:
+it provides new forms for the matcher to match along with a specific instruction
+to generate.  An instruction alias has two parts: the string to match and the
+instruction to generate.  For example:
+
+::
+
+  def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8  :$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX32rr8  GR32:$dst, GR8  :$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX64rr8  GR64:$dst, GR8  :$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)>;
+  def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)>;
+
+This shows a powerful example of the instruction aliases, matching the same
+mnemonic in multiple different ways depending on what operands are present in
+the assembly.  The result of instruction aliases can include operands in a
+different order than the destination instruction, and can use an input multiple
+times, for example:
+
+::
+
+  def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)>;
+  def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+  def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+  def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+This example also shows that tied operands are only listed once.  In the X86
+backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied
+to the output).  InstAliases take a flattened operand list without duplicates
+for tied operands.  The result of an instruction alias can also use immediates
+and fixed physical registers which are added as simple immediate operands in the
+result, for example:
+
+::
+
+  // Fixed Immediate operand.
+  def : InstAlias<"aad", (AAD8i8 10)>;
+
+  // Fixed register operand.
+  def : InstAlias<"fcomi", (COM_FIr ST1)>;
+
+  // Simple alias.
+  def : InstAlias<"fcomi $reg", (COM_FIr RST:$reg)>;
+
+Instruction aliases can also have a Requires clause to make them subtarget
+specific.
+
+If the back-end supports it, the instruction printer can automatically emit the
+alias rather than what's being aliased. It typically leads to better, more
+readable code. If it's better to print out what's being aliased, then pass a '0'
+as the third parameter to the InstAlias definition.
+
+Instruction Matching
+--------------------
+
+.. note::
+
+  To Be Written
+
+.. _Implementations of the abstract target description interfaces:
+.. _implement the target description:
+
+Target-specific Implementation Notes
+====================================
+
+This section of the document explains features or design decisions that are
+specific to the code generator for a particular target.  First we start with a
+table that summarizes what features are supported by each target.
+
+Target Feature Matrix
+---------------------
+
+Note that this table does not include the C backend or Cpp backends, since they
+do not use the target independent code generator infrastructure.  It also
+doesn't list features that are not supported fully by any target yet.  It
+considers a feature to be supported if at least one subtarget supports it.  A
+feature being supported means that it is useful and works for most cases, it
+does not indicate that there are zero known bugs in the implementation.  Here is
+the key:
+
+:raw-html:`<table border="1" cellspacing="0">`
+:raw-html:`<tr>`
+:raw-html:`<th>Unknown</th>`
+:raw-html:`<th>No support</th>`
+:raw-html:`<th>Partial Support</th>`
+:raw-html:`<th>Complete Support</th>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td class="unknown"></td>`
+:raw-html:`<td class="no"></td>`
+:raw-html:`<td class="partial"></td>`
+:raw-html:`<td class="yes"></td>`
+:raw-html:`</tr>`
+:raw-html:`</table>`
+
+Here is the table:
+
+:raw-html:`<table width="689" border="1" cellspacing="0">`
+:raw-html:`<tr><td></td>`
+:raw-html:`<td colspan="13" align="center" style="background-color:#ffc">Target</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<th>Feature</th>`
+:raw-html:`<th>ARM</th>`
+:raw-html:`<th>CellSPU</th>`
+:raw-html:`<th>Hexagon</th>`
+:raw-html:`<th>MBlaze</th>`
+:raw-html:`<th>MSP430</th>`
+:raw-html:`<th>Mips</th>`
+:raw-html:`<th>PTX</th>`
+:raw-html:`<th>PowerPC</th>`
+:raw-html:`<th>Sparc</th>`
+:raw-html:`<th>X86</th>`
+:raw-html:`<th>XCore</th>`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_reliable">is generally reliable</a></td>`
+:raw-html:`<td class="yes"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
+:raw-html:`<td class="no"></td> <!-- MBlaze -->`
+:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
+:raw-html:`<td class="yes"></td> <!-- Mips -->`
+:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- PowerPC -->`
+:raw-html:`<td class="yes"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_asmparser">assembly parser</a></td>`
+:raw-html:`<td class="no"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="no"></td> <!-- Hexagon -->`
+:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
+:raw-html:`<td class="no"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- PowerPC -->`
+:raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_disassembler">disassembler</a></td>`
+:raw-html:`<td class="yes"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="no"></td> <!-- Hexagon -->`
+:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
+:raw-html:`<td class="no"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- PowerPC -->`
+:raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_inlineasm">inline asm</a></td>`
+:raw-html:`<td class="yes"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
+:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
+:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- PowerPC -->`
+:raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_jit">jit</a></td>`
+:raw-html:`<td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="no"></td> <!-- Hexagon -->`
+:raw-html:`<td class="no"></td> <!-- MBlaze -->`
+:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
+:raw-html:`<td class="yes"></td> <!-- Mips -->`
+:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- PowerPC -->`
+:raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>`
+:raw-html:`<td class="no"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="no"></td> <!-- Hexagon -->`
+:raw-html:`<td class="yes"></td> <!-- MBlaze -->`
+:raw-html:`<td class="no"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- PowerPC -->`
+:raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a hr:raw-html:`ef="#feat_tailcall">tail calls</a></td>`
+:raw-html:`<td class="yes"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="yes"></td> <!-- Hexagon -->`
+:raw-html:`<td class="no"></td> <!-- MBlaze -->`
+:raw-html:`<td class="unknown"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="unknown"></td> <!-- PTX -->`
+:raw-html:`<td class="yes"></td> <!-- PowerPC -->`
+:raw-html:`<td class="unknown"></td> <!-- Sparc -->`
+:raw-html:`<td class="yes"></td> <!-- X86 -->`
+:raw-html:`<td class="unknown"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`<tr>`
+:raw-html:`<td><a href="#feat_segstacks">segmented stacks</a></td>`
+:raw-html:`<td class="no"></td> <!-- ARM -->`
+:raw-html:`<td class="no"></td> <!-- CellSPU -->`
+:raw-html:`<td class="no"></td> <!-- Hexagon -->`
+:raw-html:`<td class="no"></td> <!-- MBlaze -->`
+:raw-html:`<td class="no"></td> <!-- MSP430 -->`
+:raw-html:`<td class="no"></td> <!-- Mips -->`
+:raw-html:`<td class="no"></td> <!-- PTX -->`
+:raw-html:`<td class="no"></td> <!-- PowerPC -->`
+:raw-html:`<td class="no"></td> <!-- Sparc -->`
+:raw-html:`<td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->`
+:raw-html:`<td class="no"></td> <!-- XCore -->`
+:raw-html:`</tr>`
+
+:raw-html:`</table>`
+
+.. _feat_reliable:
+
+Is Generally Reliable
+^^^^^^^^^^^^^^^^^^^^^
+
+This box indicates whether the target is considered to be production quality.
+This indicates that the target has been used as a static compiler to compile
+large amounts of code by a variety of different people and is in continuous use.
+
+.. _feat_asmparser:
+
+Assembly Parser
+^^^^^^^^^^^^^^^
+
+This box indicates whether the target supports parsing target specific .s files
+by implementing the MCAsmParser interface.  This is required for llvm-mc to be
+able to act as a native assembler and is required for inline assembly support in
+the native .o file writer.
+
+.. _feat_disassembler:
+
+Disassembler
+^^^^^^^^^^^^
+
+This box indicates whether the target supports the MCDisassembler API for
+disassembling machine opcode bytes into MCInst's.
+
+.. _feat_inlineasm:
+
+Inline Asm
+^^^^^^^^^^
+
+This box indicates whether the target supports most popular inline assembly
+constraints and modifiers.
+
+.. _feat_jit:
+
+JIT Support
+^^^^^^^^^^^
+
+This box indicates whether the target supports the JIT compiler through the
+ExecutionEngine interface.
+
+.. _feat_jit_arm:
+
+The ARM backend has basic support for integer code in ARM codegen mode, but
+lacks NEON and full Thumb support.
+
+.. _feat_objectwrite:
+
+.o File Writing
+^^^^^^^^^^^^^^^
+
+This box indicates whether the target supports writing .o files (e.g. MachO,
+ELF, and/or COFF) files directly from the target.  Note that the target also
+must include an assembly parser and general inline assembly support for full
+inline assembly support in the .o writer.
+
+Targets that don't support this feature can obviously still write out .o files,
+they just rely on having an external assembler to translate from a .s file to a
+.o file (as is the case for many C compilers).
+
+.. _feat_tailcall:
+
+Tail Calls
+^^^^^^^^^^
+
+This box indicates whether the target supports guaranteed tail calls.  These are
+calls marked "`tail <LangRef.html#i_call>`_" and use the fastcc calling
+convention.  Please see the `tail call section more more details`_.
+
+.. _feat_segstacks:
+
+Segmented Stacks
+^^^^^^^^^^^^^^^^
+
+This box indicates whether the target supports segmented stacks. This replaces
+the traditional large C stack with many linked segments. It is compatible with
+the `gcc implementation <http://gcc.gnu.org/wiki/SplitStacks>`_ used by the Go
+front end.
+
+.. _feat_segstacks_x86:
+
+Basic support exists on the X86 backend. Currently vararg doesn't work and the
+object files are not marked the way the gold linker expects, but simple Go
+programs can be built by dragonegg.
+
+.. _tail call section more more details:
+
+Tail call optimization
+----------------------
+
+Tail call optimization, callee reusing the stack of the caller, is currently
+supported on x86/x86-64 and PowerPC. It is performed if:
+
+* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC
+  call convention).
+
+* The call is a tail call - in tail position (ret immediately follows call and
+  ret uses value of call or is void).
+
+* Option ``-tailcallopt`` is enabled.
+
+* Platform specific constraints are met.
+
+x86/x86-64 constraints:
+
+* No variable argument lists are used.
+
+* On x86-64 when generating GOT/PIC code only module-local calls (visibility =
+  hidden or protected) are supported.
+
+PowerPC constraints:
+
+* No variable argument lists are used.
+
+* No byval parameters are used.
+
+* On ppc32/64 GOT/PIC only module-local calls (visibility = hidden or protected)
+  are supported.
+
+Example:
+
+Call as ``llc -tailcallopt test.ll``.
+
+.. code-block:: llvm
+
+  declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4)
+
+  define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+    %l1 = add i32 %in1, %in2
+    %tmp = tail call fastcc i32 @tailcallee(i32 %in1 inreg, i32 %in2 inreg, i32 %in1, i32 %l1)
+    ret i32 %tmp
+  }
+
+Implications of ``-tailcallopt``:
+
+To support tail call optimization in situations where the callee has more
+arguments than the caller a 'callee pops arguments' convention is used. This
+currently causes each ``fastcc`` call that is not tail call optimized (because
+one or more of above constraints are not met) to be followed by a readjustment
+of the stack. So performance might be worse in such cases.
+
+Sibling call optimization
+-------------------------
+
+Sibling call optimization is a restricted form of tail call optimization.
+Unlike tail call optimization described in the previous section, it can be
+performed automatically on any tail calls when ``-tailcallopt`` option is not
+specified.
+
+Sibling call optimization is currently performed on x86/x86-64 when the
+following constraints are met:
+
+* Caller and callee have the same calling convention. It can be either ``c`` or
+  ``fastcc``.
+
+* The call is a tail call - in tail position (ret immediately follows call and
+  ret uses value of call or is void).
+
+* Caller and callee have matching return type or the callee result is not used.
+
+* If any of the callee arguments are being passed in stack, they must be
+  available in caller's own incoming argument stack and the frame offsets must
+  be the same.
+
+Example:
+
+.. code-block:: llvm
+
+  declare i32 @bar(i32, i32)
+
+  define i32 @foo(i32 %a, i32 %b, i32 %c) {
+  entry:
+    %0 = tail call i32 @bar(i32 %a, i32 %b)
+    ret i32 %0
+  }
+
+The X86 backend
+---------------
+
+The X86 code generator lives in the ``lib/Target/X86`` directory.  This code
+generator is capable of targeting a variety of x86-32 and x86-64 processors, and
+includes support for ISA extensions such as MMX and SSE.
+
+X86 Target Triples supported
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following are the known target triples that are supported by the X86
+backend.  This is not an exhaustive list, and it would be useful to add those
+that people test.
+
+* **i686-pc-linux-gnu** --- Linux
+
+* **i386-unknown-freebsd5.3** --- FreeBSD 5.3
+
+* **i686-pc-cygwin** --- Cygwin on Win32
+
+* **i686-pc-mingw32** --- MingW on Win32
+
+* **i386-pc-mingw32msvc** --- MingW crosscompiler on Linux
+
+* **i686-apple-darwin*** --- Apple Darwin on X86
+
+* **x86_64-unknown-linux-gnu** --- Linux
+
+X86 Calling Conventions supported
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following target-specific calling conventions are known to backend:
+
+* **x86_StdCall** --- stdcall calling convention seen on Microsoft Windows
+  platform (CC ID = 64).
+
+* **x86_FastCall** --- fastcall calling convention seen on Microsoft Windows
+  platform (CC ID = 65).
+
+* **x86_ThisCall** --- Similar to X86_StdCall. Passes first argument in ECX,
+  others via stack. Callee is responsible for stack cleaning. This convention is
+  used by MSVC by default for methods in its ABI (CC ID = 70).
+
+.. _X86 addressing mode:
+
+Representing X86 addressing modes in MachineInstrs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The x86 has a very flexible way of accessing memory.  It is capable of forming
+memory addresses of the following expression directly in integer instructions
+(which use ModR/M addressing):
+
+::
+
+  SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
+
+In order to represent this, LLVM tracks no less than 5 operands for each memory
+operand of this form.  This means that the "load" form of '``mov``' has the
+following ``MachineOperand``\s in this order:
+
+::
+
+  Index:        0     |    1        2       3           4          5
+  Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
+  OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
+
+Stores, and all other instructions, treat the four memory operands in the same
+way and in the same order.  If the segment register is unspecified (regno = 0),
+then no segment override is generated.  "Lea" operations do not have a segment
+register specified, so they only have 4 operands for their memory reference.
+
+X86 address spaces supported
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+x86 has a feature which provides the ability to perform loads and stores to
+different address spaces via the x86 segment registers.  A segment override
+prefix byte on an instruction causes the instruction's memory access to go to
+the specified segment.  LLVM address space 0 is the default address space, which
+includes the stack, and any unqualified memory accesses in a program.  Address
+spaces 1-255 are currently reserved for user-defined code.  The GS-segment is
+represented by address space 256, while the FS-segment is represented by address
+space 257. Other x86 segments have yet to be allocated address space
+numbers.
+
+While these address spaces may seem similar to TLS via the ``thread_local``
+keyword, and often use the same underlying hardware, there are some fundamental
+differences.
+
+The ``thread_local`` keyword applies to global variables and specifies that they
+are to be allocated in thread-local memory. There are no type qualifiers
+involved, and these variables can be pointed to with normal pointers and
+accessed with normal loads and stores.  The ``thread_local`` keyword is
+target-independent at the LLVM IR level (though LLVM doesn't yet have
+implementations of it for some configurations)
+
+Special address spaces, in contrast, apply to static types. Every load and store
+has a particular address space in its address operand type, and this is what
+determines which address space is accessed.  LLVM ignores these special address
+space qualifiers on global variables, and does not provide a way to directly
+allocate storage in them.  At the LLVM IR level, the behavior of these special
+address spaces depends in part on the underlying OS or runtime environment, and
+they are specific to x86 (and LLVM doesn't yet handle them correctly in some
+cases).
+
+Some operating systems and runtime environments use (or may in the future use)
+the FS/GS-segment registers for various low-level purposes, so care should be
+taken when considering them.
+
+Instruction naming
+^^^^^^^^^^^^^^^^^^
+
+An instruction name consists of the base name, a default operand size, and a a
+character per operand with an optional special size. For example:
+
+::
+
+  ADD8rr      -> add, 8-bit register, 8-bit register
+  IMUL16rmi   -> imul, 16-bit register, 16-bit memory, 16-bit immediate
+  IMUL16rmi8  -> imul, 16-bit register, 16-bit memory, 8-bit immediate
+  MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory
+
+The PowerPC backend
+-------------------
+
+The PowerPC code generator lives in the lib/Target/PowerPC directory.  The code
+generation is retargetable to several variations or *subtargets* of the PowerPC
+ISA; including ppc32, ppc64 and altivec.
+
+LLVM PowerPC ABI
+^^^^^^^^^^^^^^^^
+
+LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC relative
+(PIC) or static addressing for accessing global values, so no TOC (r2) is
+used. Second, r31 is used as a frame pointer to allow dynamic growth of a stack
+frame.  LLVM takes advantage of having no TOC to provide space to save the frame
+pointer in the PowerPC linkage area of the caller frame.  Other details of
+PowerPC ABI can be found at `PowerPC ABI
+<http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html>`_\
+. Note: This link describes the 32 bit ABI.  The 64 bit ABI is similar except
+space for GPRs are 8 bytes wide (not 4) and r13 is reserved for system use.
+
+Frame Layout
+^^^^^^^^^^^^
+
+The size of a PowerPC frame is usually fixed for the duration of a function's
+invocation.  Since the frame is fixed size, all references into the frame can be
+accessed via fixed offsets from the stack pointer.  The exception to this is
+when dynamic alloca or variable sized arrays are present, then a base pointer
+(r31) is used as a proxy for the stack pointer and stack pointer is free to grow
+or shrink.  A base pointer is also used if llvm-gcc is not passed the
+-fomit-frame-pointer flag. The stack pointer is always aligned to 16 bytes, so
+that space allocated for altivec vectors will be properly aligned.
+
+An invocation frame is laid out as follows (low memory at top):
+
+:raw-html:`<table border="1" cellspacing="0">`
+:raw-html:`<tr>`
+:raw-html:`<td>Linkage<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>Parameter area<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>Dynamic area<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>Locals area<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>Saved registers area<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr style="border-style: none hidden none hidden;">`
+:raw-html:`<td><br></td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>Previous Frame<br><br></td>`
+:raw-html:`</tr>`
+:raw-html:`</table>`
+
+The *linkage* area is used by a callee to save special registers prior to
+allocating its own frame.  Only three entries are relevant to LLVM. The first
+entry is the previous stack pointer (sp), aka link.  This allows probing tools
+like gdb or exception handlers to quickly scan the frames in the stack.  A
+function epilog can also use the link to pop the frame from the stack.  The
+third entry in the linkage area is used to save the return address from the lr
+register. Finally, as mentioned above, the last entry is used to save the
+previous frame pointer (r31.)  The entries in the linkage area are the size of a
+GPR, thus the linkage area is 24 bytes long in 32 bit mode and 48 bytes in 64
+bit mode.
+
+32 bit linkage area:
+
+:raw-html:`<table  border="1" cellspacing="0">`
+:raw-html:`<tr>`
+:raw-html:`<td>0</td>`
+:raw-html:`<td>Saved SP (r1)</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>4</td>`
+:raw-html:`<td>Saved CR</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>8</td>`
+:raw-html:`<td>Saved LR</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>12</td>`
+:raw-html:`<td>Reserved</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>16</td>`
+:raw-html:`<td>Reserved</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>20</td>`
+:raw-html:`<td>Saved FP (r31)</td>`
+:raw-html:`</tr>`
+:raw-html:`</table>`
+
+64 bit linkage area:
+
+:raw-html:`<table border="1" cellspacing="0">`
+:raw-html:`<tr>`
+:raw-html:`<td>0</td>`
+:raw-html:`<td>Saved SP (r1)</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>8</td>`
+:raw-html:`<td>Saved CR</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>16</td>`
+:raw-html:`<td>Saved LR</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>24</td>`
+:raw-html:`<td>Reserved</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>32</td>`
+:raw-html:`<td>Reserved</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>40</td>`
+:raw-html:`<td>Saved FP (r31)</td>`
+:raw-html:`</tr>`
+:raw-html:`</table>`
+
+The *parameter area* is used to store arguments being passed to a callee
+function.  Following the PowerPC ABI, the first few arguments are actually
+passed in registers, with the space in the parameter area unused.  However, if
+there are not enough registers or the callee is a thunk or vararg function,
+these register arguments can be spilled into the parameter area.  Thus, the
+parameter area must be large enough to store all the parameters for the largest
+call sequence made by the caller.  The size must also be minimally large enough
+to spill registers r3-r10.  This allows callees blind to the call signature,
+such as thunks and vararg functions, enough space to cache the argument
+registers.  Therefore, the parameter area is minimally 32 bytes (64 bytes in 64
+bit mode.)  Also note that since the parameter area is a fixed offset from the
+top of the frame, that a callee can access its spilt arguments using fixed
+offsets from the stack pointer (or base pointer.)
+
+Combining the information about the linkage, parameter areas and alignment. A
+stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit mode.
+
+The *dynamic area* starts out as size zero.  If a function uses dynamic alloca
+then space is added to the stack, the linkage and parameter areas are shifted to
+top of stack, and the new space is available immediately below the linkage and
+parameter areas.  The cost of shifting the linkage and parameter areas is minor
+since only the link value needs to be copied.  The link value can be easily
+fetched by adding the original frame size to the base pointer.  Note that
+allocations in the dynamic space need to observe 16 byte alignment.
+
+The *locals area* is where the llvm compiler reserves space for local variables.
+
+The *saved registers area* is where the llvm compiler spills callee saved
+registers on entry to the callee.
+
+Prolog/Epilog
+^^^^^^^^^^^^^
+
+The llvm prolog and epilog are the same as described in the PowerPC ABI, with
+the following exceptions.  Callee saved registers are spilled after the frame is
+created.  This allows the llvm epilog/prolog support to be common with other
+targets.  The base pointer callee saved register r31 is saved in the TOC slot of
+linkage area.  This simplifies allocation of space for the base pointer and
+makes it convenient to locate programatically and during debugging.
+
+Dynamic Allocation
+^^^^^^^^^^^^^^^^^^
+
+.. note::
+
+  TODO - More to come.
+
+The PTX backend
+---------------
+
+The PTX code generator lives in the lib/Target/PTX directory. It is currently a
+work-in-progress, but already supports most of the code generation functionality
+needed to generate correct PTX kernels for CUDA devices.
+
+The code generator can target PTX 2.0+, and shader model 1.0+.  The PTX ISA
+Reference Manual is used as the primary source of ISA information, though an
+effort is made to make the output of the code generator match the output of the
+NVidia nvcc compiler, whenever possible.
+
+Code Generator Options:
+
+:raw-html:`<table border="1" cellspacing="0">`
+:raw-html:`<tr>`
+:raw-html:`<th>Option</th>`
+:raw-html:`<th>Description</th>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>``double``</td>`
+:raw-html:`<td align="left">If enabled, the map_f64_to_f32 directive is disabled in the PTX output, allowing native double-precision arithmetic</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>``no-fma``</td>`
+:raw-html:`<td align="left">Disable generation of Fused-Multiply Add instructions, which may be beneficial for some devices</td>`
+:raw-html:`</tr>`
+:raw-html:`<tr>`
+:raw-html:`<td>``smxy / computexy``</td>`
+:raw-html:`<td align="left">Set shader model/compute capability to x.y, e.g. sm20 or compute13</td>`
+:raw-html:`</tr>`
+:raw-html:`</table>`
+
+Working:
+
+* Arithmetic instruction selection (including combo FMA)
+
+* Bitwise instruction selection
+
+* Control-flow instruction selection
+
+* Function calls (only on SM 2.0+ and no return arguments)
+
+* Addresses spaces (0 = global, 1 = constant, 2 = local, 4 = shared)
+
+* Thread synchronization (bar.sync)
+
+* Special register reads ([N]TID, [N]CTAID, PMx, CLOCK, etc.)
+
+In Progress:
+
+* Robust call instruction selection
+
+* Stack frame allocation
+
+* Device-specific instruction scheduling optimizations
diff --git a/docs/CommandGuide/llvm-ar.rst b/docs/CommandGuide/llvm-ar.rst
index eddffeb..8ff4192 100644
--- a/docs/CommandGuide/llvm-ar.rst
+++ b/docs/CommandGuide/llvm-ar.rst
@@ -6,7 +6,7 @@ SYNOPSIS
 --------
 
 
-**llvm-ar** [-]{dmpqrtx}[Rabfikouz] [relpos] [count] <archive> [files...]
+**llvm-ar** [-]{dmpqrtx}[Rabfikou] [relpos] [count] <archive> [files...]
 
 
 DESCRIPTION
@@ -63,16 +63,6 @@ Here's where **llvm-ar** departs from previous ``ar`` implementations:
 
 
 
-*Compression*
-
- **llvm-ar** can compress the members of an archive to save space. The
- compression used depends on what's available on the platform and what choices
- the LLVM Compressor utility makes. It generally favors bzip2 but will select
- between "no compression" or bzip2 depending on what makes sense for the
- file's content.
-
-
-
 *Directory Recursion*
 
  Most ``ar`` implementations do not recurse through directories but simply
@@ -86,9 +76,8 @@ Here's where **llvm-ar** departs from previous ``ar`` implementations:
 
  When **llvm-ar** prints out the verbose table of contents (``tv`` option), it
  precedes the usual output with a character indicating the basic kind of
- content in the file. A blank means the file is a regular file. A 'Z' means
- the file is compressed. A 'B' means the file is an LLVM bitcode file. An
- 'S' means the file is the symbol table.
+ content in the file. A blank means the file is a regular file. A 'B' means
+ the file is an LLVM bitcode file. An 'S' means the file is the symbol table.
 
 
 
@@ -98,7 +87,7 @@ OPTIONS
 
 
 The options to **llvm-ar** are compatible with other ``ar`` implementations.
-However, there are a few modifiers (*zR*) that are not found in other ``ar``
+However, there are a few modifiers (*R*) that are not found in other ``ar``
 implementations. The options to **llvm-ar** specify a single basic operation to
 perform on the archive, a variety of modifiers for that operation, the name of
 the archive file, and an optional list of file names. These options are used to
@@ -145,9 +134,9 @@ p[k]
 
 
 
-q[Rfz]
+q[Rf]
 
- Quickly append files to the end of the archive. The *R*, *f*, and *z*
+ Quickly append files to the end of the archive. The *R*, and *f*
  modifiers apply to this operation.  This operation quickly adds the
  *files* to the archive without checking for duplicates that should be
  removed first. If no *files* are specified, the archive is not modified.
@@ -156,9 +145,9 @@ q[Rfz]
 
 
 
-r[Rabfuz]
+r[Rabfu]
 
- Replace or insert file members. The *R*, *a*, *b*, *f*, *u*, and *z*
+ Replace or insert file members. The *R*, *a*, *b*, *f*, and *u*
  modifiers apply to this operation. This operation will replace existing
  *files* or insert them at the end of the archive if they do not exist. If no
  *files* are specified, the archive is not modified.
@@ -169,7 +158,7 @@ t[v]
 
  Print the table of contents. Without any modifiers, this operation just prints
  the names of the members to the standard output. With the *v* modifier,
- **llvm-ar** also prints out the file type (B=bitcode, Z=compressed, S=symbol
+ **llvm-ar** also prints out the file type (B=bitcode, S=symbol
  table, blank=regular file), the permission mode, the owner and group, the
  size, and the date. If any *files* are specified, the listing is only for
  those files. If no *files* are specified, the table of contents for the
@@ -273,15 +262,6 @@ section (above) to determine which modifiers are applicable to which operations.
 
 
 
-[z]
-
- When inserting or replacing any file in the archive, compress the file first.
- This
- modifier is safe to use when (previously) compressed bitcode files are added to
- the archive; the compressed bitcode files will not be doubly compressed.
-
-
-
 
 Modifiers (generic)
 ~~~~~~~~~~~~~~~~~~~
@@ -410,11 +390,7 @@ mode - char[8]
 size - char[10]
 
  This field provides the size of the file, in bytes, encoded as a decimal ASCII
- string. If the size field is negative (starts with a minus sign, 0x02D), then
- the archive member is stored in compressed form. The first byte of the archive
- member's data indicates the compression type used. A value of 0 (0x30) indicates
- that no compression was used. A value of 2 (0x32) indicates that bzip2
- compression was used.
+ string.
 
 
 
diff --git a/docs/CommandLine.html b/docs/CommandLine.html
deleted file mode 100644
index aff40d0..0000000
--- a/docs/CommandLine.html
+++ /dev/null
@@ -1,1976 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>CommandLine 2.0 Library Manual</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
-  CommandLine 2.0 Library Manual
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-
-  <li><a href="#quickstart">Quick Start Guide</a>
-    <ol>
-      <li><a href="#bool">Boolean Arguments</a></li>
-      <li><a href="#alias">Argument Aliases</a></li>
-      <li><a href="#onealternative">Selecting an alternative from a
-                                    set of possibilities</a></li>
-      <li><a href="#namedalternatives">Named alternatives</a></li>
-      <li><a href="#list">Parsing a list of options</a></li>
-      <li><a href="#bits">Collecting options as a set of flags</a></li>
-      <li><a href="#description">Adding freeform text to help output</a></li>
-    </ol></li>
-
-  <li><a href="#referenceguide">Reference Guide</a>
-    <ol>
-      <li><a href="#positional">Positional Arguments</a>
-        <ul>
-        <li><a href="#--">Specifying positional options with hyphens</a></li>
-        <li><a href="#getPosition">Determining absolute position with
-          getPosition</a></li>
-        <li><a href="#cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt>
-             modifier</a></li>
-        </ul></li>
-
-      <li><a href="#storage">Internal vs External Storage</a></li>
-
-      <li><a href="#attributes">Option Attributes</a></li>
-
-      <li><a href="#modifiers">Option Modifiers</a>
-        <ul>
-        <li><a href="#hiding">Hiding an option from <tt>-help</tt>
-            output</a></li>
-        <li><a href="#numoccurrences">Controlling the number of occurrences
-                                     required and allowed</a></li>
-        <li><a href="#valrequired">Controlling whether or not a value must be
-                                   specified</a></li>
-        <li><a href="#formatting">Controlling other formatting options</a></li>
-        <li><a href="#misc">Miscellaneous option modifiers</a></li>
-        <li><a href="#response">Response files</a></li>
-        </ul></li>
-
-      <li><a href="#toplevel">Top-Level Classes and Functions</a>
-        <ul>
-        <li><a href="#cl::ParseCommandLineOptions">The
-            <tt>cl::ParseCommandLineOptions</tt> function</a></li>
-        <li><a href="#cl::ParseEnvironmentOptions">The
-            <tt>cl::ParseEnvironmentOptions</tt> function</a></li>
-        <li><a href="#cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
-          function</a></li>
-        <li><a href="#cl::opt">The <tt>cl::opt</tt> class</a></li>
-        <li><a href="#cl::list">The <tt>cl::list</tt> class</a></li>
-        <li><a href="#cl::bits">The <tt>cl::bits</tt> class</a></li>
-        <li><a href="#cl::alias">The <tt>cl::alias</tt> class</a></li>
-        <li><a href="#cl::extrahelp">The <tt>cl::extrahelp</tt> class</a></li>
-        </ul></li>
-
-      <li><a href="#builtinparsers">Builtin parsers</a>
-        <ul>
-        <li><a href="#genericparser">The Generic <tt>parser&lt;t&gt;</tt>
-            parser</a></li>
-        <li><a href="#boolparser">The <tt>parser&lt;bool&gt;</tt>
-            specialization</a></li>
-        <li><a href="#boolOrDefaultparser">The <tt>parser&lt;boolOrDefault&gt;</tt>
-            specialization</a></li>
-        <li><a href="#stringparser">The <tt>parser&lt;string&gt;</tt>
-            specialization</a></li>
-        <li><a href="#intparser">The <tt>parser&lt;int&gt;</tt>
-            specialization</a></li>
-        <li><a href="#doubleparser">The <tt>parser&lt;double&gt;</tt> and
-            <tt>parser&lt;float&gt;</tt> specializations</a></li>
-        </ul></li>
-    </ol></li>
-  <li><a href="#extensionguide">Extension Guide</a>
-    <ol>
-      <li><a href="#customparser">Writing a custom parser</a></li>
-      <li><a href="#explotingexternal">Exploiting external storage</a></li>
-      <li><a href="#dynamicopts">Dynamically adding command line
-          options</a></li>
-    </ol></li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document describes the CommandLine argument processing library.  It will
-show you how to use it, and what it can do.  The CommandLine library uses a
-declarative approach to specifying the command line options that your program
-takes.  By default, these options declarations implicitly hold the value parsed
-for the option declared (of course this <a href="#storage">can be
-changed</a>).</p>
-
-<p>Although there are a <b>lot</b> of command line argument parsing libraries
-out there in many different languages, none of them fit well with what I needed.
-By looking at the features and problems of other libraries, I designed the
-CommandLine library to have the following features:</p>
-
-<ol>
-<li>Speed: The CommandLine library is very quick and uses little resources.  The
-parsing time of the library is directly proportional to the number of arguments
-parsed, not the number of options recognized.  Additionally, command line
-argument values are captured transparently into user defined global variables,
-which can be accessed like any other variable (and with the same
-performance).</li>
-
-<li>Type Safe: As a user of CommandLine, you don't have to worry about
-remembering the type of arguments that you want (is it an int?  a string? a
-bool? an enum?) and keep casting it around.  Not only does this help prevent
-error prone constructs, it also leads to dramatically cleaner source code.</li>
-
-<li>No subclasses required: To use CommandLine, you instantiate variables that
-correspond to the arguments that you would like to capture, you don't subclass a
-parser.  This means that you don't have to write <b>any</b> boilerplate
-code.</li>
-
-<li>Globally accessible: Libraries can specify command line arguments that are
-automatically enabled in any tool that links to the library.  This is possible
-because the application doesn't have to keep a list of arguments to pass to
-the parser.  This also makes supporting <a href="#dynamicopts">dynamically
-loaded options</a> trivial.</li>
-
-<li>Cleaner: CommandLine supports enum and other types directly, meaning that
-there is less error and more security built into the library.  You don't have to
-worry about whether your integral command line argument accidentally got
-assigned a value that is not valid for your enum type.</li>
-
-<li>Powerful: The CommandLine library supports many different types of
-arguments, from simple <a href="#boolparser">boolean flags</a> to <a
-href="#cl::opt">scalars arguments</a> (<a href="#stringparser">strings</a>, <a
-href="#intparser">integers</a>, <a href="#genericparser">enums</a>, <a
-href="#doubleparser">doubles</a>), to <a href="#cl::list">lists of
-arguments</a>.  This is possible because CommandLine is...</li>
-
-<li>Extensible: It is very simple to add a new argument type to CommandLine.
-Simply specify the parser that you want to use with the command line option when
-you declare it.  <a href="#customparser">Custom parsers</a> are no problem.</li>
-
-<li>Labor Saving: The CommandLine library cuts down on the amount of grunt work
-that you, the user, have to do.  For example, it automatically provides a
-<tt>-help</tt> option that shows the available command line options for your
-tool.  Additionally, it does most of the basic correctness checking for
-you.</li>
-
-<li>Capable: The CommandLine library can handle lots of different forms of
-options often found in real programs.  For example, <a
-href="#positional">positional</a> arguments, <tt>ls</tt> style <a
-href="#cl::Grouping">grouping</a> options (to allow processing '<tt>ls
--lad</tt>' naturally), <tt>ld</tt> style <a href="#cl::Prefix">prefix</a>
-options (to parse '<tt>-lmalloc -L/usr/lib</tt>'), and <a
-href="#cl::ConsumeAfter">interpreter style options</a>.</li>
-
-</ol>
-
-<p>This document will hopefully let you jump in and start using CommandLine in
-your utility quickly and painlessly.  Additionally it should be a simple
-reference manual to figure out how stuff works.  If it is failing in some area
-(or you want an extension to the library), nag the author, <a
-href="mailto:sabre@nondot.org">Chris Lattner</a>.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="quickstart">Quick Start Guide</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section of the manual runs through a simple CommandLine'ification of a
-basic compiler tool.  This is intended to show you how to jump into using the
-CommandLine library in your own program, and show you some of the cool things it
-can do.</p>
-
-<p>To start out, you need to include the CommandLine header file into your
-program:</p>
-
-<div class="doc_code"><pre>
-  #include "llvm/Support/CommandLine.h"
-</pre></div>
-
-<p>Additionally, you need to add this as the first line of your main
-program:</p>
-
-<div class="doc_code"><pre>
-int main(int argc, char **argv) {
-  <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv);
-  ...
-}
-</pre></div>
-
-<p>... which actually parses the arguments and fills in the variable
-declarations.</p>
-
-<p>Now that you are ready to support command line arguments, we need to tell the
-system which ones we want, and what type of arguments they are.  The CommandLine
-library uses a declarative syntax to model command line arguments with the
-global variable declarations that capture the parsed values.  This means that
-for every command line option that you would like to support, there should be a
-global variable declaration to capture the result.  For example, in a compiler,
-we would like to support the Unix-standard '<tt>-o &lt;filename&gt;</tt>' option
-to specify where to put the output.  With the CommandLine library, this is
-represented like this:</p>
-
-<a name="value_desc_example"></a>
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; OutputFilename("<i>o</i>", <a href="#cl::desc">cl::desc</a>("<i>Specify output filename</i>"), <a href="#cl::value_desc">cl::value_desc</a>("<i>filename</i>"));
-</pre></div>
-
-<p>This declares a global variable &quot;<tt>OutputFilename</tt>&quot; that is used to
-capture the result of the &quot;<tt>o</tt>&quot; argument (first parameter).  We specify
-that this is a simple scalar option by using the &quot;<tt><a
-href="#cl::opt">cl::opt</a></tt>&quot; template (as opposed to the <a
-href="#list">&quot;<tt>cl::list</tt> template</a>), and tell the CommandLine library
-that the data type that we are parsing is a string.</p>
-
-<p>The second and third parameters (which are optional) are used to specify what
-to output for the "<tt>-help</tt>" option.  In this case, we get a line that
-looks like this:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options]
-
-OPTIONS:
-  -help             - display available options (-help-hidden for more)
-  <b>-o &lt;filename&gt;     - Specify output filename</b>
-</pre></div>
-
-<p>Because we specified that the command line option should parse using the
-<tt>string</tt> data type, the variable declared is automatically usable as a
-real string in all contexts that a normal C++ string object may be used.  For
-example:</p>
-
-<div class="doc_code"><pre>
-  ...
-  std::ofstream Output(OutputFilename.c_str());
-  if (Output.good()) ...
-  ...
-</pre></div>
-
-<p>There are many different options that you can use to customize the command
-line option handling library, but the above example shows the general interface
-to these options.  The options can be specified in any order, and are specified
-with helper functions like <a href="#cl::desc"><tt>cl::desc(...)</tt></a>, so
-there are no positional dependencies to remember.  The available options are
-discussed in detail in the <a href="#referenceguide">Reference Guide</a>.</p>
-
-<p>Continuing the example, we would like to have our compiler take an input
-filename as well as an output filename, but we do not want the input filename to
-be specified with a hyphen (ie, not <tt>-filename.c</tt>).  To support this
-style of argument, the CommandLine library allows for <a
-href="#positional">positional</a> arguments to be specified for the program.
-These positional arguments are filled with command line parameters that are not
-in option form.  We use this feature like this:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
-</pre></div>
-
-<p>This declaration indicates that the first positional argument should be
-treated as the input filename.  Here we use the <tt><a
-href="#cl::init">cl::init</a></tt> option to specify an initial value for the
-command line option, which is used if the option is not specified (if you do not
-specify a <tt><a href="#cl::init">cl::init</a></tt> modifier for an option, then
-the default constructor for the data type is used to initialize the value).
-Command line options default to being optional, so if we would like to require
-that the user always specify an input filename, we would add the <tt><a
-href="#cl::Required">cl::Required</a></tt> flag, and we could eliminate the
-<tt><a href="#cl::init">cl::init</a></tt> modifier, like this:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <b><a href="#cl::Required">cl::Required</a></b>);
-</pre></div>
-
-<p>Again, the CommandLine library does not require the options to be specified
-in any particular order, so the above declaration is equivalent to:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::Required">cl::Required</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"));
-</pre></div>
-
-<p>By simply adding the <tt><a href="#cl::Required">cl::Required</a></tt> flag,
-the CommandLine library will automatically issue an error if the argument is not
-specified, which shifts all of the command line option verification code out of
-your application into the library.  This is just one example of how using flags
-can alter the default behaviour of the library, on a per-option basis.  By
-adding one of the declarations above, the <tt>-help</tt> option synopsis is now
-extended to:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options] <b>&lt;input file&gt;</b>
-
-OPTIONS:
-  -help             - display available options (-help-hidden for more)
-  -o &lt;filename&gt;     - Specify output filename
-</pre></div>
-
-<p>... indicating that an input filename is expected.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="bool">Boolean Arguments</a>
-</h3>
-
-<div>
-
-<p>In addition to input and output filenames, we would like the compiler example
-to support three boolean flags: "<tt>-f</tt>" to force writing binary output to
-a terminal, "<tt>--quiet</tt>" to enable quiet mode, and "<tt>-q</tt>" for
-backwards compatibility with some of our users.  We can support these by
-declaring options of boolean type like this:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable binary output on terminals</i>"));
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet2("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"), <a href="#cl::Hidden">cl::Hidden</a>);
-</pre></div>
-
-<p>This does what you would expect: it declares three boolean variables
-("<tt>Force</tt>", "<tt>Quiet</tt>", and "<tt>Quiet2</tt>") to recognize these
-options.  Note that the "<tt>-q</tt>" option is specified with the "<a
-href="#cl::Hidden"><tt>cl::Hidden</tt></a>" flag.  This modifier prevents it
-from being shown by the standard "<tt>-help</tt>" output (note that it is still
-shown in the "<tt>-help-hidden</tt>" output).</p>
-
-<p>The CommandLine library uses a <a href="#builtinparsers">different parser</a>
-for different data types.  For example, in the string case, the argument passed
-to the option is copied literally into the content of the string variable... we
-obviously cannot do that in the boolean case, however, so we must use a smarter
-parser.  In the case of the boolean parser, it allows no options (in which case
-it assigns the value of true to the variable), or it allows the values
-"<tt>true</tt>" or "<tt>false</tt>" to be specified, allowing any of the
-following inputs:</p>
-
-<div class="doc_code"><pre>
- compiler -f          # No value, 'Force' == true
- compiler -f=true     # Value specified, 'Force' == true
- compiler -f=TRUE     # Value specified, 'Force' == true
- compiler -f=FALSE    # Value specified, 'Force' == false
-</pre></div>
-
-<p>... you get the idea.  The <a href="#boolparser">bool parser</a> just turns
-the string values into boolean values, and rejects things like '<tt>compiler
--f=foo</tt>'.  Similarly, the <a href="#doubleparser">float</a>, <a
-href="#doubleparser">double</a>, and <a href="#intparser">int</a> parsers work
-like you would expect, using the '<tt>strtol</tt>' and '<tt>strtod</tt>' C
-library calls to parse the string value into the specified data type.</p>
-
-<p>With the declarations above, "<tt>compiler -help</tt>" emits this:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options] &lt;input file&gt;
-
-OPTIONS:
-  <b>-f     - Enable binary output on terminals</b>
-  -o     - Override output filename
-  <b>-quiet - Don't print informational messages</b>
-  -help  - display available options (-help-hidden for more)
-</pre></div>
-
-<p>and "<tt>compiler -help-hidden</tt>" prints this:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options] &lt;input file&gt;
-
-OPTIONS:
-  -f     - Enable binary output on terminals
-  -o     - Override output filename
-  <b>-q     - Don't print informational messages</b>
-  -quiet - Don't print informational messages
-  -help  - display available options (-help-hidden for more)
-</pre></div>
-
-<p>This brief example has shown you how to use the '<tt><a
-href="#cl::opt">cl::opt</a></tt>' class to parse simple scalar command line
-arguments.  In addition to simple scalar arguments, the CommandLine library also
-provides primitives to support CommandLine option <a href="#alias">aliases</a>,
-and <a href="#list">lists</a> of options.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="alias">Argument Aliases</a>
-</h3>
-
-<div>
-
-<p>So far, the example works well, except for the fact that we need to check the
-quiet condition like this now:</p>
-
-<div class="doc_code"><pre>
-...
-  if (!Quiet &amp;&amp; !Quiet2) printInformationalMessage(...);
-...
-</pre></div>
-
-<p>... which is a real pain!  Instead of defining two values for the same
-condition, we can use the "<tt><a href="#cl::alias">cl::alias</a></tt>" class to make the "<tt>-q</tt>"
-option an <b>alias</b> for the "<tt>-quiet</tt>" option, instead of providing
-a value itself:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Overwrite output files</i>"));
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
-<a href="#cl::alias">cl::alias</a>     QuietA("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Alias for -quiet</i>"), <a href="#cl::aliasopt">cl::aliasopt</a>(Quiet));
-</pre></div>
-
-<p>The third line (which is the only one we modified from above) defines a
-"<tt>-q</tt>" alias that updates the "<tt>Quiet</tt>" variable (as specified by
-the <tt><a href="#cl::aliasopt">cl::aliasopt</a></tt> modifier) whenever it is
-specified.  Because aliases do not hold state, the only thing the program has to
-query is the <tt>Quiet</tt> variable now.  Another nice feature of aliases is
-that they automatically hide themselves from the <tt>-help</tt> output
-(although, again, they are still visible in the <tt>-help-hidden
-output</tt>).</p>
-
-<p>Now the application code can simply use:</p>
-
-<div class="doc_code"><pre>
-...
-  if (!Quiet) printInformationalMessage(...);
-...
-</pre></div>
-
-<p>... which is much nicer!  The "<tt><a href="#cl::alias">cl::alias</a></tt>"
-can be used to specify an alternative name for any variable type, and has many
-uses.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="onealternative">Selecting an alternative from a set of
-  possibilities</a>
-</h3>
-
-<div>
-
-<p>So far we have seen how the CommandLine library handles builtin types like
-<tt>std::string</tt>, <tt>bool</tt> and <tt>int</tt>, but how does it handle
-things it doesn't know about, like enums or '<tt>int*</tt>'s?</p>
-
-<p>The answer is that it uses a table-driven generic parser (unless you specify
-your own parser, as described in the <a href="#extensionguide">Extension
-Guide</a>).  This parser maps literal strings to whatever type is required, and
-requires you to tell it what this mapping should be.</p>
-
-<p>Let's say that we would like to add four optimization levels to our
-optimizer, using the standard flags "<tt>-g</tt>", "<tt>-O0</tt>",
-"<tt>-O1</tt>", and "<tt>-O2</tt>".  We could easily implement this with boolean
-options like above, but there are several problems with this strategy:</p>
-
-<ol>
-<li>A user could specify more than one of the options at a time, for example,
-"<tt>compiler -O3 -O2</tt>".  The CommandLine library would not be able to
-catch this erroneous input for us.</li>
-
-<li>We would have to test 4 different variables to see which ones are set.</li>
-
-<li>This doesn't map to the numeric levels that we want... so we cannot easily
-see if some level &gt;= "<tt>-O1</tt>" is enabled.</li>
-
-</ol>
-
-<p>To cope with these problems, we can use an enum value, and have the
-CommandLine library fill it in with the appropriate level directly, which is
-used like this:</p>
-
-<div class="doc_code"><pre>
-enum OptLevel {
-  g, O1, O2, O3
-};
-
-<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
-  <a href="#cl::values">cl::values</a>(
-    clEnumVal(g , "<i>No optimizations, enable debugging</i>"),
-    clEnumVal(O1, "<i>Enable trivial optimizations</i>"),
-    clEnumVal(O2, "<i>Enable default optimizations</i>"),
-    clEnumVal(O3, "<i>Enable expensive optimizations</i>"),
-   clEnumValEnd));
-
-...
-  if (OptimizationLevel &gt;= O2) doPartialRedundancyElimination(...);
-...
-</pre></div>
-
-<p>This declaration defines a variable "<tt>OptimizationLevel</tt>" of the
-"<tt>OptLevel</tt>" enum type.  This variable can be assigned any of the values
-that are listed in the declaration (Note that the declaration list must be
-terminated with the "<tt>clEnumValEnd</tt>" argument!).  The CommandLine
-library enforces
-that the user can only specify one of the options, and it ensure that only valid
-enum values can be specified.  The "<tt>clEnumVal</tt>" macros ensure that the
-command line arguments matched the enum values.  With this option added, our
-help output now is:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options] &lt;input file&gt;
-
-OPTIONS:
-  <b>Choose optimization level:
-    -g          - No optimizations, enable debugging
-    -O1         - Enable trivial optimizations
-    -O2         - Enable default optimizations
-    -O3         - Enable expensive optimizations</b>
-  -f            - Enable binary output on terminals
-  -help         - display available options (-help-hidden for more)
-  -o &lt;filename&gt; - Specify output filename
-  -quiet        - Don't print informational messages
-</pre></div>
-
-<p>In this case, it is sort of awkward that flag names correspond directly to
-enum names, because we probably don't want a enum definition named "<tt>g</tt>"
-in our program.  Because of this, we can alternatively write this example like
-this:</p>
-
-<div class="doc_code"><pre>
-enum OptLevel {
-  Debug, O1, O2, O3
-};
-
-<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
-  <a href="#cl::values">cl::values</a>(
-   clEnumValN(Debug, "g", "<i>No optimizations, enable debugging</i>"),
-    clEnumVal(O1        , "<i>Enable trivial optimizations</i>"),
-    clEnumVal(O2        , "<i>Enable default optimizations</i>"),
-    clEnumVal(O3        , "<i>Enable expensive optimizations</i>"),
-   clEnumValEnd));
-
-...
-  if (OptimizationLevel == Debug) outputDebugInfo(...);
-...
-</pre></div>
-
-<p>By using the "<tt>clEnumValN</tt>" macro instead of "<tt>clEnumVal</tt>", we
-can directly specify the name that the flag should get.  In general a direct
-mapping is nice, but sometimes you can't or don't want to preserve the mapping,
-which is when you would use it.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="namedalternatives">Named Alternatives</a>
-</h3>
-
-<div>
-
-<p>Another useful argument form is a named alternative style.  We shall use this
-style in our compiler to specify different debug levels that can be used.
-Instead of each debug level being its own switch, we want to support the
-following options, of which only one can be specified at a time:
-"<tt>--debug-level=none</tt>", "<tt>--debug-level=quick</tt>",
-"<tt>--debug-level=detailed</tt>".  To do this, we use the exact same format as
-our optimization level flags, but we also specify an option name.  For this
-case, the code looks like this:</p>
-
-<div class="doc_code"><pre>
-enum DebugLev {
-  nodebuginfo, quick, detailed
-};
-
-// Enable Debug Options to be specified on the command line
-<a href="#cl::opt">cl::opt</a>&lt;DebugLev&gt; DebugLevel("<i>debug_level</i>", <a href="#cl::desc">cl::desc</a>("<i>Set the debugging level:</i>"),
-  <a href="#cl::values">cl::values</a>(
-    clEnumValN(nodebuginfo, "none", "<i>disable debug information</i>"),
-     clEnumVal(quick,               "<i>enable quick debug information</i>"),
-     clEnumVal(detailed,            "<i>enable detailed debug information</i>"),
-    clEnumValEnd));
-</pre></div>
-
-<p>This definition defines an enumerated command line variable of type "<tt>enum
-DebugLev</tt>", which works exactly the same way as before.  The difference here
-is just the interface exposed to the user of your program and the help output by
-the "<tt>-help</tt>" option:</p>
-
-<div class="doc_code"><pre>
-USAGE: compiler [options] &lt;input file&gt;
-
-OPTIONS:
-  Choose optimization level:
-    -g          - No optimizations, enable debugging
-    -O1         - Enable trivial optimizations
-    -O2         - Enable default optimizations
-    -O3         - Enable expensive optimizations
-  <b>-debug_level  - Set the debugging level:
-    =none       - disable debug information
-    =quick      - enable quick debug information
-    =detailed   - enable detailed debug information</b>
-  -f            - Enable binary output on terminals
-  -help         - display available options (-help-hidden for more)
-  -o &lt;filename&gt; - Specify output filename
-  -quiet        - Don't print informational messages
-</pre></div>
-
-<p>Again, the only structural difference between the debug level declaration and
-the optimization level declaration is that the debug level declaration includes
-an option name (<tt>"debug_level"</tt>), which automatically changes how the
-library processes the argument.  The CommandLine library supports both forms so
-that you can choose the form most appropriate for your application.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="list">Parsing a list of options</a>
-</h3>
-
-<div>
-
-<p>Now that we have the standard run-of-the-mill argument types out of the way,
-lets get a little wild and crazy.  Lets say that we want our optimizer to accept
-a <b>list</b> of optimizations to perform, allowing duplicates.  For example, we
-might want to run: "<tt>compiler -dce -constprop -inline -dce -strip</tt>".  In
-this case, the order of the arguments and the number of appearances is very
-important.  This is what the "<tt><a href="#cl::list">cl::list</a></tt>"
-template is for.  First, start by defining an enum of the optimizations that you
-would like to perform:</p>
-
-<div class="doc_code"><pre>
-enum Opts {
-  // 'inline' is a C++ keyword, so name it 'inlining'
-  dce, constprop, inlining, strip
-};
-</pre></div>
-
-<p>Then define your "<tt><a href="#cl::list">cl::list</a></tt>" variable:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::list">cl::list</a>&lt;Opts&gt; OptimizationList(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
-  <a href="#cl::values">cl::values</a>(
-    clEnumVal(dce               , "<i>Dead Code Elimination</i>"),
-    clEnumVal(constprop         , "<i>Constant Propagation</i>"),
-   clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
-    clEnumVal(strip             , "<i>Strip Symbols</i>"),
-  clEnumValEnd));
-</pre></div>
-
-<p>This defines a variable that is conceptually of the type
-"<tt>std::vector&lt;enum Opts&gt;</tt>".  Thus, you can access it with standard
-vector methods:</p>
-
-<div class="doc_code"><pre>
-  for (unsigned i = 0; i != OptimizationList.size(); ++i)
-    switch (OptimizationList[i])
-       ...
-</pre></div>
-
-<p>... to iterate through the list of options specified.</p>
-
-<p>Note that the "<tt><a href="#cl::list">cl::list</a></tt>" template is
-completely general and may be used with any data types or other arguments that
-you can use with the "<tt><a href="#cl::opt">cl::opt</a></tt>" template.  One
-especially useful way to use a list is to capture all of the positional
-arguments together if there may be more than one specified.  In the case of a
-linker, for example, the linker takes several '<tt>.o</tt>' files, and needs to
-capture them into a list.  This is naturally specified as:</p>
-
-<div class="doc_code"><pre>
-...
-<a href="#cl::list">cl::list</a>&lt;std::string&gt; InputFilenames(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("&lt;Input files&gt;"), <a href="#cl::OneOrMore">cl::OneOrMore</a>);
-...
-</pre></div>
-
-<p>This variable works just like a "<tt>vector&lt;string&gt;</tt>" object.  As
-such, accessing the list is simple, just like above.  In this example, we used
-the <tt><a href="#cl::OneOrMore">cl::OneOrMore</a></tt> modifier to inform the
-CommandLine library that it is an error if the user does not specify any
-<tt>.o</tt> files on our command line.  Again, this just reduces the amount of
-checking we have to do.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="bits">Collecting options as a set of flags</a>
-</h3>
-
-<div>
-
-<p>Instead of collecting sets of options in a list, it is also possible to
-gather information for enum values in a <b>bit vector</b>.  The representation used by
-the <a href="#bits"><tt>cl::bits</tt></a> class is an <tt>unsigned</tt>
-integer.  An enum value is represented by a 0/1 in the enum's ordinal value bit
-position. 1 indicating that the enum was specified, 0 otherwise.  As each
-specified value is parsed, the resulting enum's bit is set in the option's bit
-vector:</p>
-
-<div class="doc_code"><pre>
-  <i>bits</i> |= 1 << (unsigned)<i>enum</i>;
-</pre></div>
-
-<p>Options that are specified multiple times are redundant.  Any instances after
-the first are discarded.</p>
-
-<p>Reworking the above list example, we could replace <a href="#list">
-<tt>cl::list</tt></a> with <a href="#bits"><tt>cl::bits</tt></a>:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::bits">cl::bits</a>&lt;Opts&gt; OptimizationBits(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
-  <a href="#cl::values">cl::values</a>(
-    clEnumVal(dce               , "<i>Dead Code Elimination</i>"),
-    clEnumVal(constprop         , "<i>Constant Propagation</i>"),
-   clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
-    clEnumVal(strip             , "<i>Strip Symbols</i>"),
-  clEnumValEnd));
-</pre></div>
-
-<p>To test to see if <tt>constprop</tt> was specified, we can use the
-<tt>cl:bits::isSet</tt> function:</p>
-
-<div class="doc_code"><pre>
-  if (OptimizationBits.isSet(constprop)) {
-    ...
-  }
-</pre></div>
-
-<p>It's also possible to get the raw bit vector using the
-<tt>cl::bits::getBits</tt> function:</p>
-
-<div class="doc_code"><pre>
-  unsigned bits = OptimizationBits.getBits();
-</pre></div>
-
-<p>Finally, if external storage is used, then the location specified must be of
-<b>type</b> <tt>unsigned</tt>. In all other ways a <a
-href="#bits"><tt>cl::bits</tt></a> option is equivalent to a <a
-href="#list"> <tt>cl::list</tt></a> option.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="description">Adding freeform text to help output</a>
-</h3>
-
-<div>
-
-<p>As our program grows and becomes more mature, we may decide to put summary
-information about what it does into the help output.  The help output is styled
-to look similar to a Unix <tt>man</tt> page, providing concise information about
-a program.  Unix <tt>man</tt> pages, however often have a description about what
-the program does.  To add this to your CommandLine program, simply pass a third
-argument to the <a
-href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
-call in main.  This additional argument is then printed as the overview
-information for your program, allowing you to include any additional information
-that you want.  For example:</p>
-
-<div class="doc_code"><pre>
-int main(int argc, char **argv) {
-  <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv, " CommandLine compiler example\n\n"
-                              "  This program blah blah blah...\n");
-  ...
-}
-</pre></div>
-
-<p>would yield the help output:</p>
-
-<div class="doc_code"><pre>
-<b>OVERVIEW: CommandLine compiler example
-
-  This program blah blah blah...</b>
-
-USAGE: compiler [options] &lt;input file&gt;
-
-OPTIONS:
-  ...
-  -help             - display available options (-help-hidden for more)
-  -o &lt;filename&gt;     - Specify output filename
-</pre></div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="referenceguide">Reference Guide</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that you know the basics of how to use the CommandLine library, this
-section will give you the detailed information you need to tune how command line
-options work, as well as information on more "advanced" command line option
-processing capabilities.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="positional">Positional Arguments</a>
-</h3>
-
-<div>
-
-<p>Positional arguments are those arguments that are not named, and are not
-specified with a hyphen.  Positional arguments should be used when an option is
-specified by its position alone.  For example, the standard Unix <tt>grep</tt>
-tool takes a regular expression argument, and an optional filename to search
-through (which defaults to standard input if a filename is not specified).
-Using the CommandLine library, this would be specified as:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; Regex   (<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;regular expression&gt;</i>"), <a href="#cl::Required">cl::Required</a>);
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; Filename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
-</pre></div>
-
-<p>Given these two option declarations, the <tt>-help</tt> output for our grep
-replacement would look like this:</p>
-
-<div class="doc_code"><pre>
-USAGE: spiffygrep [options] <b>&lt;regular expression&gt; &lt;input file&gt;</b>
-
-OPTIONS:
-  -help - display available options (-help-hidden for more)
-</pre></div>
-
-<p>... and the resultant program could be used just like the standard
-<tt>grep</tt> tool.</p>
-
-<p>Positional arguments are sorted by their order of construction.  This means
-that command line options will be ordered according to how they are listed in a
-.cpp file, but will not have an ordering defined if the positional arguments
-are defined in multiple .cpp files.  The fix for this problem is simply to
-define all of your positional arguments in one .cpp file.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="--">Specifying positional options with hyphens</a>
-</h4>
-
-<div>
-
-<p>Sometimes you may want to specify a value to your positional argument that
-starts with a hyphen (for example, searching for '<tt>-foo</tt>' in a file).  At
-first, you will have trouble doing this, because it will try to find an argument
-named '<tt>-foo</tt>', and will fail (and single quotes will not save you).
-Note that the system <tt>grep</tt> has the same problem:</p>
-
-<div class="doc_code"><pre>
-  $ spiffygrep '-foo' test.txt
-  Unknown command line argument '-foo'.  Try: spiffygrep -help'
-
-  $ grep '-foo' test.txt
-  grep: illegal option -- f
-  grep: illegal option -- o
-  grep: illegal option -- o
-  Usage: grep -hblcnsviw pattern file . . .
-</pre></div>
-
-<p>The solution for this problem is the same for both your tool and the system
-version: use the '<tt>--</tt>' marker.  When the user specifies '<tt>--</tt>' on
-the command line, it is telling the program that all options after the
-'<tt>--</tt>' should be treated as positional arguments, not options.  Thus, we
-can use it like this:</p>
-
-<div class="doc_code"><pre>
-  $ spiffygrep -- -foo test.txt
-    ...output...
-</pre></div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="getPosition">Determining absolute position with getPosition()</a>
-</h4>
-<div>
-  <p>Sometimes an option can affect or modify the meaning of another option. For
-  example, consider <tt>gcc</tt>'s <tt>-x LANG</tt> option. This tells
-  <tt>gcc</tt> to ignore the suffix of subsequent positional arguments and force
-  the file to be interpreted as if it contained source code in language
-  <tt>LANG</tt>. In order to handle this properly, you need to know the
-  absolute position of each argument, especially those in lists, so their
-  interaction(s) can be applied correctly. This is also useful for options like
-  <tt>-llibname</tt> which is actually a positional argument that starts with
-  a dash.</p>
-  <p>So, generally, the problem is that you have two <tt>cl::list</tt> variables
-  that interact in some way. To ensure the correct interaction, you can use the
-  <tt>cl::list::getPosition(optnum)</tt> method. This method returns the
-  absolute position (as found on the command line) of the <tt>optnum</tt>
-  item in the <tt>cl::list</tt>.</p>
-  <p>The idiom for usage is like this:</p>
-
-  <div class="doc_code"><pre>
-  static cl::list&lt;std::string&gt; Files(cl::Positional, cl::OneOrMore);
-  static cl::list&lt;std::string&gt; Libraries("l", cl::ZeroOrMore);
-
-  int main(int argc, char**argv) {
-    // ...
-    std::vector&lt;std::string&gt;::iterator fileIt = Files.begin();
-    std::vector&lt;std::string&gt;::iterator libIt  = Libraries.begin();
-    unsigned libPos = 0, filePos = 0;
-    while ( 1 ) {
-      if ( libIt != Libraries.end() )
-        libPos = Libraries.getPosition( libIt - Libraries.begin() );
-      else
-        libPos = 0;
-      if ( fileIt != Files.end() )
-        filePos = Files.getPosition( fileIt - Files.begin() );
-      else
-        filePos = 0;
-
-      if ( filePos != 0 &amp;&amp; (libPos == 0 || filePos &lt; libPos) ) {
-        // Source File Is next
-        ++fileIt;
-      }
-      else if ( libPos != 0 &amp;&amp; (filePos == 0 || libPos &lt; filePos) ) {
-        // Library is next
-        ++libIt;
-      }
-      else
-        break; // we're done with the list
-    }
-  }</pre></div>
-
-  <p>Note that, for compatibility reasons, the <tt>cl::opt</tt> also supports an
-  <tt>unsigned getPosition()</tt> option that will provide the absolute position
-  of that option. You can apply the same approach as above with a
-  <tt>cl::opt</tt> and a <tt>cl::list</tt> option as you can with two lists.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt> modifier</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::ConsumeAfter</tt> <a href="#formatting">formatting option</a> is
-used to construct programs that use "interpreter style" option processing.  With
-this style of option processing, all arguments specified after the last
-positional argument are treated as special interpreter arguments that are not
-interpreted by the command line argument.</p>
-
-<p>As a concrete example, lets say we are developing a replacement for the
-standard Unix Bourne shell (<tt>/bin/sh</tt>).  To run <tt>/bin/sh</tt>, first
-you specify options to the shell itself (like <tt>-x</tt> which turns on trace
-output), then you specify the name of the script to run, then you specify
-arguments to the script.  These arguments to the script are parsed by the Bourne
-shell command line option processor, but are not interpreted as options to the
-shell itself.  Using the CommandLine library, we would specify this as:</p>
-
-<div class="doc_code"><pre>
-<a href="#cl::opt">cl::opt</a>&lt;string&gt; Script(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input script&gt;</i>"), <a href="#cl::init">cl::init</a>("-"));
-<a href="#cl::list">cl::list</a>&lt;string&gt;  Argv(<a href="#cl::ConsumeAfter">cl::ConsumeAfter</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;program arguments&gt;...</i>"));
-<a href="#cl::opt">cl::opt</a>&lt;bool&gt;    Trace("<i>x</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable trace output</i>"));
-</pre></div>
-
-<p>which automatically provides the help output:</p>
-
-<div class="doc_code"><pre>
-USAGE: spiffysh [options] <b>&lt;input script&gt; &lt;program arguments&gt;...</b>
-
-OPTIONS:
-  -help - display available options (-help-hidden for more)
-  <b>-x    - Enable trace output</b>
-</pre></div>
-
-<p>At runtime, if we run our new shell replacement as `<tt>spiffysh -x test.sh
--a -x -y bar</tt>', the <tt>Trace</tt> variable will be set to true, the
-<tt>Script</tt> variable will be set to "<tt>test.sh</tt>", and the
-<tt>Argv</tt> list will contain <tt>["-a", "-x", "-y", "bar"]</tt>, because they
-were specified after the last positional argument (which is the script
-name).</p>
-
-<p>There are several limitations to when <tt>cl::ConsumeAfter</tt> options can
-be specified.  For example, only one <tt>cl::ConsumeAfter</tt> can be specified
-per program, there must be at least one <a href="#positional">positional
-argument</a> specified, there must not be any <a href="#cl::list">cl::list</a>
-positional arguments, and the <tt>cl::ConsumeAfter</tt> option should be a <a
-href="#cl::list">cl::list</a> option.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="storage">Internal vs External Storage</a>
-</h3>
-
-<div>
-
-<p>By default, all command line options automatically hold the value that they
-parse from the command line.  This is very convenient in the common case,
-especially when combined with the ability to define command line options in the
-files that use them.  This is called the internal storage model.</p>
-
-<p>Sometimes, however, it is nice to separate the command line option processing
-code from the storage of the value parsed.  For example, lets say that we have a
-'<tt>-debug</tt>' option that we would like to use to enable debug information
-across the entire body of our program.  In this case, the boolean value
-controlling the debug code should be globally accessible (in a header file, for
-example) yet the command line option processing code should not be exposed to
-all of these clients (requiring lots of .cpp files to #include
-<tt>CommandLine.h</tt>).</p>
-
-<p>To do this, set up your .h file with your option, like this for example:</p>
-
-<div class="doc_code">
-<pre>
-<i>// DebugFlag.h - Get access to the '-debug' command line option
-//
-
-// DebugFlag - This boolean is set to true if the '-debug' command line option
-// is specified.  This should probably not be referenced directly, instead, use
-// the DEBUG macro below.
-//</i>
-extern bool DebugFlag;
-
-<i>// DEBUG macro - This macro should be used by code to emit debug information.
-// In the '-debug' option is specified on the command line, and if this is a
-// debug build, then the code specified as the option to the macro will be
-// executed.  Otherwise it will not be.</i>
-<span class="doc_hilite">#ifdef NDEBUG
-#define DEBUG(X)
-#else
-#define DEBUG(X)</span> do { if (DebugFlag) { X; } } while (0)
-<span class="doc_hilite">#endif</span>
-</pre>
-</div>
-
-<p>This allows clients to blissfully use the <tt>DEBUG()</tt> macro, or the
-<tt>DebugFlag</tt> explicitly if they want to.  Now we just need to be able to
-set the <tt>DebugFlag</tt> boolean when the option is set.  To do this, we pass
-an additional argument to our command line argument processor, and we specify
-where to fill in with the <a href="#cl::location">cl::location</a>
-attribute:</p>
-
-<div class="doc_code">
-<pre>
-bool DebugFlag;                  <i>// the actual value</i>
-static <a href="#cl::opt">cl::opt</a>&lt;bool, true&gt;       <i>// The parser</i>
-Debug("<i>debug</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable debug output</i>"), <a href="#cl::Hidden">cl::Hidden</a>, <a href="#cl::location">cl::location</a>(DebugFlag));
-</pre>
-</div>
-
-<p>In the above example, we specify "<tt>true</tt>" as the second argument to
-the <tt><a href="#cl::opt">cl::opt</a></tt> template, indicating that the
-template should not maintain a copy of the value itself.  In addition to this,
-we specify the <tt><a href="#cl::location">cl::location</a></tt> attribute, so
-that <tt>DebugFlag</tt> is automatically set.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="attributes">Option Attributes</a>
-</h3>
-
-<div>
-
-<p>This section describes the basic attributes that you can specify on
-options.</p>
-
-<ul>
-
-<li>The option name attribute (which is required for all options, except <a
-href="#positional">positional options</a>) specifies what the option name is.
-This option is specified in simple double quotes:
-
-<pre>
-<a href="#cl::opt">cl::opt</a>&lt;<b>bool</b>&gt; Quiet("<i>quiet</i>");
-</pre>
-
-</li>
-
-<li><a name="cl::desc">The <b><tt>cl::desc</tt></b></a> attribute specifies a
-description for the option to be shown in the <tt>-help</tt> output for the
-program.</li>
-
-<li><a name="cl::value_desc">The <b><tt>cl::value_desc</tt></b></a> attribute
-specifies a string that can be used to fine tune the <tt>-help</tt> output for
-a command line option.  Look <a href="#value_desc_example">here</a> for an
-example.</li>
-
-<li><a name="cl::init">The <b><tt>cl::init</tt></b></a> attribute specifies an
-initial value for a <a href="#cl::opt">scalar</a> option.  If this attribute is
-not specified then the command line option value defaults to the value created
-by the default constructor for the type. <b>Warning</b>: If you specify both
-<b><tt>cl::init</tt></b> and <b><tt>cl::location</tt></b> for an option,
-you must specify <b><tt>cl::location</tt></b> first, so that when the
-command-line parser sees <b><tt>cl::init</tt></b>, it knows where to put the
-initial value. (You will get an error at runtime if you don't put them in
-the right order.)</li>
-
-<li><a name="cl::location">The <b><tt>cl::location</tt></b></a> attribute where
-to store the value for a parsed command line option if using external storage.
-See the section on <a href="#storage">Internal vs External Storage</a> for more
-information.</li>
-
-<li><a name="cl::aliasopt">The <b><tt>cl::aliasopt</tt></b></a> attribute
-specifies which option a <tt><a href="#cl::alias">cl::alias</a></tt> option is
-an alias for.</li>
-
-<li><a name="cl::values">The <b><tt>cl::values</tt></b></a> attribute specifies
-the string-to-value mapping to be used by the generic parser.  It takes a
-<b>clEnumValEnd terminated</b> list of (option, value, description) triplets
-that
-specify the option name, the value mapped to, and the description shown in the
-<tt>-help</tt> for the tool.  Because the generic parser is used most
-frequently with enum values, two macros are often useful:
-
-<ol>
-
-<li><a name="clEnumVal">The <b><tt>clEnumVal</tt></b></a> macro is used as a
-nice simple way to specify a triplet for an enum.  This macro automatically
-makes the option name be the same as the enum name.  The first option to the
-macro is the enum, the second is the description for the command line
-option.</li>
-
-<li><a name="clEnumValN">The <b><tt>clEnumValN</tt></b></a> macro is used to
-specify macro options where the option name doesn't equal the enum name.  For
-this macro, the first argument is the enum value, the second is the flag name,
-and the second is the description.</li>
-
-</ol>
-
-You will get a compile time error if you try to use cl::values with a parser
-that does not support it.</li>
-
-<li><a name="cl::multi_val">The <b><tt>cl::multi_val</tt></b></a>
-attribute specifies that this option takes has multiple values
-(example: <tt>-sectalign segname sectname sectvalue</tt>). This
-attribute takes one unsigned argument - the number of values for the
-option. This attribute is valid only on <tt>cl::list</tt> options (and
-will fail with compile error if you try to use it with other option
-types). It is allowed to use all of the usual modifiers on
-multi-valued options (besides <tt>cl::ValueDisallowed</tt>,
-obviously).</li>
-
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="modifiers">Option Modifiers</a>
-</h3>
-
-<div>
-
-<p>Option modifiers are the flags and expressions that you pass into the
-constructors for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
-href="#cl::list">cl::list</a></tt>.  These modifiers give you the ability to
-tweak how options are parsed and how <tt>-help</tt> output is generated to fit
-your application well.</p>
-
-<p>These options fall into five main categories:</p>
-
-<ol>
-<li><a href="#hiding">Hiding an option from <tt>-help</tt> output</a></li>
-<li><a href="#numoccurrences">Controlling the number of occurrences
-                             required and allowed</a></li>
-<li><a href="#valrequired">Controlling whether or not a value must be
-                           specified</a></li>
-<li><a href="#formatting">Controlling other formatting options</a></li>
-<li><a href="#misc">Miscellaneous option modifiers</a></li>
-</ol>
-
-<p>It is not possible to specify two options from the same category (you'll get
-a runtime error) to a single option, except for options in the miscellaneous
-category.  The CommandLine library specifies defaults for all of these settings
-that are the most useful in practice and the most common, which mean that you
-usually shouldn't have to worry about these.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="hiding">Hiding an option from <tt>-help</tt> output</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::NotHidden</tt>, <tt>cl::Hidden</tt>, and
-<tt>cl::ReallyHidden</tt> modifiers are used to control whether or not an option
-appears in the <tt>-help</tt> and <tt>-help-hidden</tt> output for the
-compiled program:</p>
-
-<ul>
-
-<li><a name="cl::NotHidden">The <b><tt>cl::NotHidden</tt></b></a> modifier
-(which is the default for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
-href="#cl::list">cl::list</a></tt> options) indicates the option is to appear
-in both help listings.</li>
-
-<li><a name="cl::Hidden">The <b><tt>cl::Hidden</tt></b></a> modifier (which is the
-default for <tt><a href="#cl::alias">cl::alias</a></tt> options) indicates that
-the option should not appear in the <tt>-help</tt> output, but should appear in
-the <tt>-help-hidden</tt> output.</li>
-
-<li><a name="cl::ReallyHidden">The <b><tt>cl::ReallyHidden</tt></b></a> modifier
-indicates that the option should not appear in any help output.</li>
-
-</ul>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="numoccurrences">Controlling the number of occurrences required and
-  allowed</a>
-</h4>
-
-<div>
-
-<p>This group of options is used to control how many time an option is allowed
-(or required) to be specified on the command line of your program.  Specifying a
-value for this setting allows the CommandLine library to do error checking for
-you.</p>
-
-<p>The allowed values for this option group are:</p>
-
-<ul>
-
-<li><a name="cl::Optional">The <b><tt>cl::Optional</tt></b></a> modifier (which
-is the default for the <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
-href="#cl::alias">cl::alias</a></tt> classes) indicates that your program will
-allow either zero or one occurrence of the option to be specified.</li>
-
-<li><a name="cl::ZeroOrMore">The <b><tt>cl::ZeroOrMore</tt></b></a> modifier
-(which is the default for the <tt><a href="#cl::list">cl::list</a></tt> class)
-indicates that your program will allow the option to be specified zero or more
-times.</li>
-
-<li><a name="cl::Required">The <b><tt>cl::Required</tt></b></a> modifier
-indicates that the specified option must be specified exactly one time.</li>
-
-<li><a name="cl::OneOrMore">The <b><tt>cl::OneOrMore</tt></b></a> modifier
-indicates that the option must be specified at least one time.</li>
-
-<li>The <b><tt>cl::ConsumeAfter</tt></b> modifier is described in the <a
-href="#positional">Positional arguments section</a>.</li>
-
-</ul>
-
-<p>If an option is not specified, then the value of the option is equal to the
-value specified by the <tt><a href="#cl::init">cl::init</a></tt> attribute.  If
-the <tt><a href="#cl::init">cl::init</a></tt> attribute is not specified, the
-option value is initialized with the default constructor for the data type.</p>
-
-<p>If an option is specified multiple times for an option of the <tt><a
-href="#cl::opt">cl::opt</a></tt> class, only the last value will be
-retained.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="valrequired">Controlling whether or not a value must be specified</a>
-</h4>
-
-<div>
-
-<p>This group of options is used to control whether or not the option allows a
-value to be present.  In the case of the CommandLine library, a value is either
-specified with an equal sign (e.g. '<tt>-index-depth=17</tt>') or as a trailing
-string (e.g. '<tt>-o a.out</tt>').</p>
-
-<p>The allowed values for this option group are:</p>
-
-<ul>
-
-<li><a name="cl::ValueOptional">The <b><tt>cl::ValueOptional</tt></b></a> modifier
-(which is the default for <tt>bool</tt> typed options) specifies that it is
-acceptable to have a value, or not.  A boolean argument can be enabled just by
-appearing on the command line, or it can have an explicit '<tt>-foo=true</tt>'.
-If an option is specified with this mode, it is illegal for the value to be
-provided without the equal sign.  Therefore '<tt>-foo true</tt>' is illegal.  To
-get this behavior, you must use the <a
-href="#cl::ValueRequired">cl::ValueRequired</a> modifier.</li>
-
-<li><a name="cl::ValueRequired">The <b><tt>cl::ValueRequired</tt></b></a> modifier
-(which is the default for all other types except for <a
-href="#onealternative">unnamed alternatives using the generic parser</a>)
-specifies that a value must be provided.  This mode informs the command line
-library that if an option is not provides with an equal sign, that the next
-argument provided must be the value.  This allows things like '<tt>-o
-a.out</tt>' to work.</li>
-
-<li><a name="cl::ValueDisallowed">The <b><tt>cl::ValueDisallowed</tt></b></a>
-modifier (which is the default for <a href="#onealternative">unnamed
-alternatives using the generic parser</a>) indicates that it is a runtime error
-for the user to specify a value.  This can be provided to disallow users from
-providing options to boolean options (like '<tt>-foo=true</tt>').</li>
-
-</ul>
-
-<p>In general, the default values for this option group work just like you would
-want them to.  As mentioned above, you can specify the <a
-href="#cl::ValueDisallowed">cl::ValueDisallowed</a> modifier to a boolean
-argument to restrict your command line parser.  These options are mostly useful
-when <a href="#extensionguide">extending the library</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="formatting">Controlling other formatting options</a>
-</h4>
-
-<div>
-
-<p>The formatting option group is used to specify that the command line option
-has special abilities and is otherwise different from other command line
-arguments.  As usual, you can only specify one of these arguments at most.</p>
-
-<ul>
-
-<li><a name="cl::NormalFormatting">The <b><tt>cl::NormalFormatting</tt></b></a>
-modifier (which is the default all options) specifies that this option is
-"normal".</li>
-
-<li><a name="cl::Positional">The <b><tt>cl::Positional</tt></b></a> modifier
-specifies that this is a positional argument that does not have a command line
-option associated with it.  See the <a href="#positional">Positional
-Arguments</a> section for more information.</li>
-
-<li>The <b><a href="#cl::ConsumeAfter"><tt>cl::ConsumeAfter</tt></a></b> modifier
-specifies that this option is used to capture "interpreter style" arguments.  See <a href="#cl::ConsumeAfter">this section for more information</a>.</li>
-
-<li><a name="cl::Prefix">The <b><tt>cl::Prefix</tt></b></a> modifier specifies
-that this option prefixes its value.  With 'Prefix' options, the equal sign does
-not separate the value from the option name specified. Instead, the value is
-everything after the prefix, including any equal sign if present. This is useful
-for processing odd arguments like <tt>-lmalloc</tt> and <tt>-L/usr/lib</tt> in a
-linker tool or <tt>-DNAME=value</tt> in a compiler tool.   Here, the
-'<tt>l</tt>', '<tt>D</tt>' and '<tt>L</tt>' options are normal string (or list)
-options, that have the <b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b>
-modifier added to allow the CommandLine library to recognize them.  Note that
-<b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b> options must not have the
-<b><tt><a href="#cl::ValueDisallowed">cl::ValueDisallowed</a></tt></b> modifier
-specified.</li>
-
-<li><a name="cl::Grouping">The <b><tt>cl::Grouping</tt></b></a> modifier is used
-to implement Unix-style tools (like <tt>ls</tt>) that have lots of single letter
-arguments, but only require a single dash.  For example, the '<tt>ls -labF</tt>'
-command actually enables four different options, all of which are single
-letters.  Note that <b><tt><a href="#cl::Grouping">cl::Grouping</a></tt></b>
-options cannot have values.</li>
-
-</ul>
-
-<p>The CommandLine library does not restrict how you use the <b><tt><a
-href="#cl::Prefix">cl::Prefix</a></tt></b> or <b><tt><a
-href="#cl::Grouping">cl::Grouping</a></tt></b> modifiers, but it is possible to
-specify ambiguous argument settings.  Thus, it is possible to have multiple
-letter options that are prefix or grouping options, and they will still work as
-designed.</p>
-
-<p>To do this, the CommandLine library uses a greedy algorithm to parse the
-input option into (potentially multiple) prefix and grouping options.  The
-strategy basically looks like this:</p>
-
-<div class="doc_code"><tt>parse(string OrigInput) {</tt>
-
-<ol>
-<li><tt>string input = OrigInput;</tt>
-<li><tt>if (isOption(input)) return getOption(input).parse();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Normal option</i>
-<li><tt>while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Remove the last letter</i>
-<li><tt>if (input.empty()) return error();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// No matching option</i>
-<li><tt>if (getOption(input).isPrefix())<br>
-&nbsp;&nbsp;return getOption(input).parse(input);</tt>
-<li><tt>while (!input.empty()) {&nbsp;&nbsp;&nbsp;&nbsp;<i>// Must be grouping options</i><br>
-&nbsp;&nbsp;getOption(input).parse();<br>
-&nbsp;&nbsp;OrigInput.erase(OrigInput.begin(), OrigInput.begin()+input.length());<br>
-&nbsp;&nbsp;input = OrigInput;<br>
-&nbsp;&nbsp;while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();<br>
-}</tt>
-<li><tt>if (!OrigInput.empty()) error();</tt></li>
-</ol>
-
-<p><tt>}</tt></p>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="misc">Miscellaneous option modifiers</a>
-</h4>
-
-<div>
-
-<p>The miscellaneous option modifiers are the only flags where you can specify
-more than one flag from the set: they are not mutually exclusive.  These flags
-specify boolean properties that modify the option.</p>
-
-<ul>
-
-<li><a name="cl::CommaSeparated">The <b><tt>cl::CommaSeparated</tt></b></a> modifier
-indicates that any commas specified for an option's value should be used to
-split the value up into multiple values for the option.  For example, these two
-options are equivalent when <tt>cl::CommaSeparated</tt> is specified:
-"<tt>-foo=a -foo=b -foo=c</tt>" and "<tt>-foo=a,b,c</tt>".  This option only
-makes sense to be used in a case where the option is allowed to accept one or
-more values (i.e. it is a <a href="#cl::list">cl::list</a> option).</li>
-
-<li><a name="cl::PositionalEatsArgs">The
-<b><tt>cl::PositionalEatsArgs</tt></b></a> modifier (which only applies to
-positional arguments, and only makes sense for lists) indicates that positional
-argument should consume any strings after it (including strings that start with
-a "-") up until another recognized positional argument.  For example, if you
-have two "eating" positional arguments, "<tt>pos1</tt>" and "<tt>pos2</tt>", the
-string "<tt>-pos1 -foo -bar baz -pos2 -bork</tt>" would cause the "<tt>-foo -bar
--baz</tt>" strings to be applied to the "<tt>-pos1</tt>" option and the
-"<tt>-bork</tt>" string to be applied to the "<tt>-pos2</tt>" option.</li>
-
-<li><a name="cl::Sink">The <b><tt>cl::Sink</tt></b></a> modifier is
-used to handle unknown options. If there is at least one option with
-<tt>cl::Sink</tt> modifier specified, the parser passes
-unrecognized option strings to it as values instead of signaling an
-error. As with <tt>cl::CommaSeparated</tt>, this modifier
-only makes sense with a <a href="#cl::list">cl::list</a> option.</li>
-
-</ul>
-
-<p>So far, these are the only three miscellaneous option modifiers.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="response">Response files</a>
-</h4>
-
-<div>
-
-<p>Some systems, such as certain variants of Microsoft Windows and
-some older Unices have a relatively low limit on command-line
-length. It is therefore customary to use the so-called 'response
-files' to circumvent this restriction. These files are mentioned on
-the command-line (using the "@file") syntax. The program reads these
-files and inserts the contents into argv, thereby working around the
-command-line length limits. Response files are enabled by an optional
-fourth argument to
-<a href="#cl::ParseEnvironmentOptions"><tt>cl::ParseEnvironmentOptions</tt></a>
-and
-<a href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>.
-</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="toplevel">Top-Level Classes and Functions</a>
-</h3>
-
-<div>
-
-<p>Despite all of the built-in flexibility, the CommandLine option library
-really only consists of one function (<a
-href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>)
-and three main classes: <a href="#cl::opt"><tt>cl::opt</tt></a>, <a
-href="#cl::list"><tt>cl::list</tt></a>, and <a
-href="#cl::alias"><tt>cl::alias</tt></a>.  This section describes these three
-classes in detail.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::ParseCommandLineOptions">The <tt>cl::ParseCommandLineOptions</tt>
-  function</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::ParseCommandLineOptions</tt> function is designed to be called
-directly from <tt>main</tt>, and is used to fill in the values of all of the
-command line option variables once <tt>argc</tt> and <tt>argv</tt> are
-available.</p>
-
-<p>The <tt>cl::ParseCommandLineOptions</tt> function requires two parameters
-(<tt>argc</tt> and <tt>argv</tt>), but may also take an optional third parameter
-which holds <a href="#description">additional extra text</a> to emit when the
-<tt>-help</tt> option is invoked, and a fourth boolean parameter that enables
-<a href="#response">response files</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::ParseEnvironmentOptions">The <tt>cl::ParseEnvironmentOptions</tt>
-  function</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::ParseEnvironmentOptions</tt> function has mostly the same effects
-as <a
-href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>,
-except that it is designed to take values for options from an environment
-variable, for those cases in which reading the command line is not convenient or
-desired. It fills in the values of all the command line option variables just
-like <a
-href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
-does.</p>
-
-<p>It takes four parameters: the name of the program (since <tt>argv</tt> may
-not be available, it can't just look in <tt>argv[0]</tt>), the name of the
-environment variable to examine, the optional
-<a href="#description">additional extra text</a> to emit when the
-<tt>-help</tt> option is invoked, and the boolean
-switch that controls whether <a href="#response">response files</a>
-should be read.</p>
-
-<p><tt>cl::ParseEnvironmentOptions</tt> will break the environment
-variable's value up into words and then process them using
-<a href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>.
-<b>Note:</b> Currently <tt>cl::ParseEnvironmentOptions</tt> does not support
-quoting, so an environment variable containing <tt>-option "foo bar"</tt> will
-be parsed as three words, <tt>-option</tt>, <tt>"foo</tt>, and <tt>bar"</tt>,
-which is different from what you would get from the shell with the same
-input.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
-  function</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::SetVersionPrinter</tt> function is designed to be called
-directly from <tt>main</tt> and <i>before</i>
-<tt>cl::ParseCommandLineOptions</tt>. Its use is optional. It simply arranges
-for a function to be called in response to the <tt>--version</tt> option instead
-of having the <tt>CommandLine</tt> library print out the usual version string
-for LLVM. This is useful for programs that are not part of LLVM but wish to use
-the <tt>CommandLine</tt> facilities. Such programs should just define a small
-function that takes no arguments and returns <tt>void</tt> and that prints out
-whatever version information is appropriate for the program. Pass the address
-of that function to <tt>cl::SetVersionPrinter</tt> to arrange for it to be
-called when the <tt>--version</tt> option is given by the user.</p>
-
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::opt">The <tt>cl::opt</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::opt</tt> class is the class used to represent scalar command line
-options, and is the one used most of the time.  It is a templated class which
-can take up to three arguments (all except for the first have default values
-though):</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> cl {
-  <b>template</b> &lt;<b>class</b> DataType, <b>bool</b> ExternalStorage = <b>false</b>,
-            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
-  <b>class</b> opt;
-}
-</pre></div>
-
-<p>The first template argument specifies what underlying data type the command
-line argument is, and is used to select a default parser implementation.  The
-second template argument is used to specify whether the option should contain
-the storage for the option (the default) or whether external storage should be
-used to contain the value parsed for the option (see <a href="#storage">Internal
-vs External Storage</a> for more information).</p>
-
-<p>The third template argument specifies which parser to use.  The default value
-selects an instantiation of the <tt>parser</tt> class based on the underlying
-data type of the option.  In general, this default works well for most
-applications, so this option is only used when using a <a
-href="#customparser">custom parser</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::list">The <tt>cl::list</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::list</tt> class is the class used to represent a list of command
-line options.  It too is a templated class which can take up to three
-arguments:</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> cl {
-  <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
-            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
-  <b>class</b> list;
-}
-</pre></div>
-
-<p>This class works the exact same as the <a
-href="#cl::opt"><tt>cl::opt</tt></a> class, except that the second argument is
-the <b>type</b> of the external storage, not a boolean value.  For this class,
-the marker type '<tt>bool</tt>' is used to indicate that internal storage should
-be used.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::bits">The <tt>cl::bits</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::bits</tt> class is the class used to represent a list of command
-line options in the form of a bit vector.  It is also a templated class which
-can take up to three arguments:</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> cl {
-  <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
-            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
-  <b>class</b> bits;
-}
-</pre></div>
-
-<p>This class works the exact same as the <a
-href="#cl::opt"><tt>cl::lists</tt></a> class, except that the second argument
-must be of <b>type</b> <tt>unsigned</tt> if external storage is used.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::alias">The <tt>cl::alias</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::alias</tt> class is a nontemplated class that is used to form
-aliases for other arguments.</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> cl {
-  <b>class</b> alias;
-}
-</pre></div>
-
-<p>The <a href="#cl::aliasopt"><tt>cl::aliasopt</tt></a> attribute should be
-used to specify which option this is an alias for.  Alias arguments default to
-being <a href="#cl::Hidden">Hidden</a>, and use the aliased options parser to do
-the conversion from string to data.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="cl::extrahelp">The <tt>cl::extrahelp</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>cl::extrahelp</tt> class is a nontemplated class that allows extra
-help text to be printed out for the <tt>-help</tt> option.</p>
-
-<div class="doc_code"><pre>
-<b>namespace</b> cl {
-  <b>struct</b> extrahelp;
-}
-</pre></div>
-
-<p>To use the extrahelp, simply construct one with a <tt>const char*</tt>
-parameter to the constructor. The text passed to the constructor will be printed
-at the bottom of the help message, verbatim. Note that multiple
-<tt>cl::extrahelp</tt> <b>can</b> be used, but this practice is discouraged. If
-your tool needs to print additional help information, put all that help into a
-single <tt>cl::extrahelp</tt> instance.</p>
-<p>For example:</p>
-<div class="doc_code"><pre>
-  cl::extrahelp("\nADDITIONAL HELP:\n\n  This is the extra help\n");
-</pre></div>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="builtinparsers">Builtin parsers</a>
-</h3>
-
-<div>
-
-<p>Parsers control how the string value taken from the command line is
-translated into a typed value, suitable for use in a C++ program.  By default,
-the CommandLine library uses an instance of <tt>parser&lt;type&gt;</tt> if the
-command line option specifies that it uses values of type '<tt>type</tt>'.
-Because of this, custom option processing is specified with specializations of
-the '<tt>parser</tt>' class.</p>
-
-<p>The CommandLine library provides the following builtin parser
-specializations, which are sufficient for most applications. It can, however,
-also be extended to work with new data types and new ways of interpreting the
-same data.  See the <a href="#customparser">Writing a Custom Parser</a> for more
-details on this type of library extension.</p>
-
-<ul>
-
-<li><a name="genericparser">The <b>generic <tt>parser&lt;t&gt;</tt> parser</b></a>
-can be used to map strings values to any data type, through the use of the <a
-href="#cl::values">cl::values</a> property, which specifies the mapping
-information.  The most common use of this parser is for parsing enum values,
-which allows you to use the CommandLine library for all of the error checking to
-make sure that only valid enum values are specified (as opposed to accepting
-arbitrary strings).  Despite this, however, the generic parser class can be used
-for any data type.</li>
-
-<li><a name="boolparser">The <b><tt>parser&lt;bool&gt;</tt> specialization</b></a>
-is used to convert boolean strings to a boolean value.  Currently accepted
-strings are "<tt>true</tt>", "<tt>TRUE</tt>", "<tt>True</tt>", "<tt>1</tt>",
-"<tt>false</tt>", "<tt>FALSE</tt>", "<tt>False</tt>", and "<tt>0</tt>".</li>
-
-<li><a name="boolOrDefaultparser">The <b><tt>parser&lt;boolOrDefault&gt;</tt>
- specialization</b></a> is used for cases where the value is boolean,
-but we also need to know whether the option was specified at all.  boolOrDefault
-is an enum with 3 values, BOU_UNSET, BOU_TRUE and BOU_FALSE.  This parser accepts
-the same strings as <b><tt>parser&lt;bool&gt;</tt></b>.</li>
-
-<li><a name="stringparser">The <b><tt>parser&lt;string&gt;</tt>
-specialization</b></a> simply stores the parsed string into the string value
-specified.  No conversion or modification of the data is performed.</li>
-
-<li><a name="intparser">The <b><tt>parser&lt;int&gt;</tt> specialization</b></a>
-uses the C <tt>strtol</tt> function to parse the string input.  As such, it will
-accept a decimal number (with an optional '+' or '-' prefix) which must start
-with a non-zero digit.  It accepts octal numbers, which are identified with a
-'<tt>0</tt>' prefix digit, and hexadecimal numbers with a prefix of
-'<tt>0x</tt>' or '<tt>0X</tt>'.</li>
-
-<li><a name="doubleparser">The <b><tt>parser&lt;double&gt;</tt></b></a> and
-<b><tt>parser&lt;float&gt;</tt> specializations</b> use the standard C
-<tt>strtod</tt> function to convert floating point strings into floating point
-values.  As such, a broad range of string formats is supported, including
-exponential notation (ex: <tt>1.7e15</tt>) and properly supports locales.
-</li>
-
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="extensionguide">Extension Guide</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Although the CommandLine library has a lot of functionality built into it
-already (as discussed previously), one of its true strengths lie in its
-extensibility.  This section discusses how the CommandLine library works under
-the covers and illustrates how to do some simple, common, extensions.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="customparser">Writing a custom parser</a>
-</h3>
-
-<div>
-
-<p>One of the simplest and most common extensions is the use of a custom parser.
-As <a href="#builtinparsers">discussed previously</a>, parsers are the portion
-of the CommandLine library that turns string input from the user into a
-particular parsed data type, validating the input in the process.</p>
-
-<p>There are two ways to use a new parser:</p>
-
-<ol>
-
-<li>
-
-<p>Specialize the <a href="#genericparser"><tt>cl::parser</tt></a> template for
-your custom data type.<p>
-
-<p>This approach has the advantage that users of your custom data type will
-automatically use your custom parser whenever they define an option with a value
-type of your data type.  The disadvantage of this approach is that it doesn't
-work if your fundamental data type is something that is already supported.</p>
-
-</li>
-
-<li>
-
-<p>Write an independent class, using it explicitly from options that need
-it.</p>
-
-<p>This approach works well in situations where you would line to parse an
-option using special syntax for a not-very-special data-type.  The drawback of
-this approach is that users of your parser have to be aware that they are using
-your parser instead of the builtin ones.</p>
-
-</li>
-
-</ol>
-
-<p>To guide the discussion, we will discuss a custom parser that accepts file
-sizes, specified with an optional unit after the numeric size.  For example, we
-would like to parse "102kb", "41M", "1G" into the appropriate integer value.  In
-this case, the underlying data type we want to parse into is
-'<tt>unsigned</tt>'.  We choose approach #2 above because we don't want to make
-this the default for all <tt>unsigned</tt> options.</p>
-
-<p>To start out, we declare our new <tt>FileSizeParser</tt> class:</p>
-
-<div class="doc_code"><pre>
-<b>struct</b> FileSizeParser : <b>public</b> cl::basic_parser&lt;<b>unsigned</b>&gt; {
-  <i>// parse - Return true on error.</i>
-  <b>bool</b> parse(cl::Option &amp;O, <b>const char</b> *ArgName, <b>const</b> std::string &amp;ArgValue,
-             <b>unsigned</b> &amp;Val);
-};
-</pre></div>
-
-<p>Our new class inherits from the <tt>cl::basic_parser</tt> template class to
-fill in the default, boiler plate code for us.  We give it the data type that
-we parse into, the last argument to the <tt>parse</tt> method, so that clients of
-our custom parser know what object type to pass in to the parse method.  (Here we
-declare that we parse into '<tt>unsigned</tt>' variables.)</p>
-
-<p>For most purposes, the only method that must be implemented in a custom
-parser is the <tt>parse</tt> method.  The <tt>parse</tt> method is called
-whenever the option is invoked, passing in the option itself, the option name,
-the string to parse, and a reference to a return value.  If the string to parse
-is not well-formed, the parser should output an error message and return true.
-Otherwise it should return false and set '<tt>Val</tt>' to the parsed value.  In
-our example, we implement <tt>parse</tt> as:</p>
-
-<div class="doc_code"><pre>
-<b>bool</b> FileSizeParser::parse(cl::Option &amp;O, <b>const char</b> *ArgName,
-                           <b>const</b> std::string &amp;Arg, <b>unsigned</b> &amp;Val) {
-  <b>const char</b> *ArgStart = Arg.c_str();
-  <b>char</b> *End;
-
-  <i>// Parse integer part, leaving 'End' pointing to the first non-integer char</i>
-  Val = (unsigned)strtol(ArgStart, &amp;End, 0);
-
-  <b>while</b> (1) {
-    <b>switch</b> (*End++) {
-    <b>case</b> 0: <b>return</b> false;   <i>// No error</i>
-    <b>case</b> 'i':               <i>// Ignore the 'i' in KiB if people use that</i>
-    <b>case</b> 'b': <b>case</b> 'B':     <i>// Ignore B suffix</i>
-      <b>break</b>;
-
-    <b>case</b> 'g': <b>case</b> 'G': Val *= 1024*1024*1024; <b>break</b>;
-    <b>case</b> 'm': <b>case</b> 'M': Val *= 1024*1024;      <b>break</b>;
-    <b>case</b> 'k': <b>case</b> 'K': Val *= 1024;           <b>break</b>;
-
-    default:
-      <i>// Print an error message if unrecognized character!</i>
-      <b>return</b> O.error("'" + Arg + "' value invalid for file size argument!");
-    }
-  }
-}
-</pre></div>
-
-<p>This function implements a very simple parser for the kinds of strings we are
-interested in.  Although it has some holes (it allows "<tt>123KKK</tt>" for
-example), it is good enough for this example.  Note that we use the option
-itself to print out the error message (the <tt>error</tt> method always returns
-true) in order to get a nice error message (shown below).  Now that we have our
-parser class, we can use it like this:</p>
-
-<div class="doc_code"><pre>
-<b>static</b> <a href="#cl::opt">cl::opt</a>&lt;<b>unsigned</b>, <b>false</b>, FileSizeParser&gt;
-MFS(<i>"max-file-size"</i>, <a href="#cl::desc">cl::desc</a>(<i>"Maximum file size to accept"</i>),
-    <a href="#cl::value_desc">cl::value_desc</a>("<i>size</i>"));
-</pre></div>
-
-<p>Which adds this to the output of our program:</p>
-
-<div class="doc_code"><pre>
-OPTIONS:
-  -help                 - display available options (-help-hidden for more)
-  ...
-  <b>-max-file-size=&lt;size&gt; - Maximum file size to accept</b>
-</pre></div>
-
-<p>And we can test that our parse works correctly now (the test program just
-prints out the max-file-size argument value):</p>
-
-<div class="doc_code"><pre>
-$ ./test
-MFS: 0
-$ ./test -max-file-size=123MB
-MFS: 128974848
-$ ./test -max-file-size=3G
-MFS: 3221225472
-$ ./test -max-file-size=dog
--max-file-size option: 'dog' value invalid for file size argument!
-</pre></div>
-
-<p>It looks like it works.  The error message that we get is nice and helpful,
-and we seem to accept reasonable file sizes.  This wraps up the "custom parser"
-tutorial.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="explotingexternal">Exploiting external storage</a>
-</h3>
-
-<div>
-  <p>Several of the LLVM libraries define static <tt>cl::opt</tt> instances that
-  will automatically be included in any program that links with that library.
-  This is a feature. However, sometimes it is necessary to know the value of the
-  command line option outside of the library. In these cases the library does or
-  should provide an external storage location that is accessible to users of the
-  library. Examples of this include the <tt>llvm::DebugFlag</tt> exported by the
-  <tt>lib/Support/Debug.cpp</tt> file and the <tt>llvm::TimePassesIsEnabled</tt>
-  flag exported by the <tt>lib/VMCore/Pass.cpp</tt> file.</p>
-
-<p>TODO: complete this section</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="dynamicopts">Dynamically adding command line options</a>
-</h3>
-
-<div>
-
-<p>TODO: fill in this section</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst
new file mode 100644
index 0000000..302f5a4
--- /dev/null
+++ b/docs/CommandLine.rst
@@ -0,0 +1,1615 @@
+.. _commandline:
+
+==============================
+CommandLine 2.0 Library Manual
+==============================
+
+Introduction
+============
+
+This document describes the CommandLine argument processing library.  It will
+show you how to use it, and what it can do.  The CommandLine library uses a
+declarative approach to specifying the command line options that your program
+takes.  By default, these options declarations implicitly hold the value parsed
+for the option declared (of course this `can be changed`_).
+
+Although there are a **lot** of command line argument parsing libraries out
+there in many different languages, none of them fit well with what I needed.  By
+looking at the features and problems of other libraries, I designed the
+CommandLine library to have the following features:
+
+#. Speed: The CommandLine library is very quick and uses little resources.  The
+   parsing time of the library is directly proportional to the number of
+   arguments parsed, not the number of options recognized.  Additionally,
+   command line argument values are captured transparently into user defined
+   global variables, which can be accessed like any other variable (and with the
+   same performance).
+
+#. Type Safe: As a user of CommandLine, you don't have to worry about
+   remembering the type of arguments that you want (is it an int?  a string? a
+   bool? an enum?) and keep casting it around.  Not only does this help prevent
+   error prone constructs, it also leads to dramatically cleaner source code.
+
+#. No subclasses required: To use CommandLine, you instantiate variables that
+   correspond to the arguments that you would like to capture, you don't
+   subclass a parser.  This means that you don't have to write **any**
+   boilerplate code.
+
+#. Globally accessible: Libraries can specify command line arguments that are
+   automatically enabled in any tool that links to the library.  This is
+   possible because the application doesn't have to keep a list of arguments to
+   pass to the parser.  This also makes supporting `dynamically loaded options`_
+   trivial.
+
+#. Cleaner: CommandLine supports enum and other types directly, meaning that
+   there is less error and more security built into the library.  You don't have
+   to worry about whether your integral command line argument accidentally got
+   assigned a value that is not valid for your enum type.
+
+#. Powerful: The CommandLine library supports many different types of arguments,
+   from simple `boolean flags`_ to `scalars arguments`_ (`strings`_,
+   `integers`_, `enums`_, `doubles`_), to `lists of arguments`_.  This is
+   possible because CommandLine is...
+
+#. Extensible: It is very simple to add a new argument type to CommandLine.
+   Simply specify the parser that you want to use with the command line option
+   when you declare it. `Custom parsers`_ are no problem.
+
+#. Labor Saving: The CommandLine library cuts down on the amount of grunt work
+   that you, the user, have to do.  For example, it automatically provides a
+   ``-help`` option that shows the available command line options for your tool.
+   Additionally, it does most of the basic correctness checking for you.
+
+#. Capable: The CommandLine library can handle lots of different forms of
+   options often found in real programs.  For example, `positional`_ arguments,
+   ``ls`` style `grouping`_ options (to allow processing '``ls -lad``'
+   naturally), ``ld`` style `prefix`_ options (to parse '``-lmalloc
+   -L/usr/lib``'), and interpreter style options.
+
+This document will hopefully let you jump in and start using CommandLine in your
+utility quickly and painlessly.  Additionally it should be a simple reference
+manual to figure out how stuff works.  If it is failing in some area (or you
+want an extension to the library), nag the author, `Chris
+Lattner <mailto:sabre@nondot.org>`_.
+
+Quick Start Guide
+=================
+
+This section of the manual runs through a simple CommandLine'ification of a
+basic compiler tool.  This is intended to show you how to jump into using the
+CommandLine library in your own program, and show you some of the cool things it
+can do.
+
+To start out, you need to include the CommandLine header file into your program:
+
+.. code-block:: c++
+
+  #include "llvm/Support/CommandLine.h"
+
+Additionally, you need to add this as the first line of your main program:
+
+.. code-block:: c++
+
+  int main(int argc, char **argv) {
+    cl::ParseCommandLineOptions(argc, argv);
+    ...
+  }
+
+... which actually parses the arguments and fills in the variable declarations.
+
+Now that you are ready to support command line arguments, we need to tell the
+system which ones we want, and what type of arguments they are.  The CommandLine
+library uses a declarative syntax to model command line arguments with the
+global variable declarations that capture the parsed values.  This means that
+for every command line option that you would like to support, there should be a
+global variable declaration to capture the result.  For example, in a compiler,
+we would like to support the Unix-standard '``-o <filename>``' option to specify
+where to put the output.  With the CommandLine library, this is represented like
+this:
+
+.. _scalars arguments:
+.. _here:
+
+.. code-block:: c++
+
+  cl::opt<string> OutputFilename("o", cl::desc("Specify output filename"), cl::value_desc("filename"));
+
+This declares a global variable "``OutputFilename``" that is used to capture the
+result of the "``o``" argument (first parameter).  We specify that this is a
+simple scalar option by using the "``cl::opt``" template (as opposed to the
+"``cl::list``" template), and tell the CommandLine library that the data
+type that we are parsing is a string.
+
+The second and third parameters (which are optional) are used to specify what to
+output for the "``-help``" option.  In this case, we get a line that looks like
+this:
+
+::
+
+  USAGE: compiler [options]
+
+  OPTIONS:
+    -help             - display available options (-help-hidden for more)
+    -o <filename>     - Specify output filename
+
+Because we specified that the command line option should parse using the
+``string`` data type, the variable declared is automatically usable as a real
+string in all contexts that a normal C++ string object may be used.  For
+example:
+
+.. code-block:: c++
+
+  ...
+  std::ofstream Output(OutputFilename.c_str());
+  if (Output.good()) ...
+  ...
+
+There are many different options that you can use to customize the command line
+option handling library, but the above example shows the general interface to
+these options.  The options can be specified in any order, and are specified
+with helper functions like `cl::desc(...)`_, so there are no positional
+dependencies to remember.  The available options are discussed in detail in the
+`Reference Guide`_.
+
+Continuing the example, we would like to have our compiler take an input
+filename as well as an output filename, but we do not want the input filename to
+be specified with a hyphen (ie, not ``-filename.c``).  To support this style of
+argument, the CommandLine library allows for `positional`_ arguments to be
+specified for the program.  These positional arguments are filled with command
+line parameters that are not in option form.  We use this feature like this:
+
+.. code-block:: c++
+
+
+  cl::opt<string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+This declaration indicates that the first positional argument should be treated
+as the input filename.  Here we use the `cl::init`_ option to specify an initial
+value for the command line option, which is used if the option is not specified
+(if you do not specify a `cl::init`_ modifier for an option, then the default
+constructor for the data type is used to initialize the value).  Command line
+options default to being optional, so if we would like to require that the user
+always specify an input filename, we would add the `cl::Required`_ flag, and we
+could eliminate the `cl::init`_ modifier, like this:
+
+.. code-block:: c++
+
+  cl::opt<string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::Required);
+
+Again, the CommandLine library does not require the options to be specified in
+any particular order, so the above declaration is equivalent to:
+
+.. code-block:: c++
+
+  cl::opt<string> InputFilename(cl::Positional, cl::Required, cl::desc("<input file>"));
+
+By simply adding the `cl::Required`_ flag, the CommandLine library will
+automatically issue an error if the argument is not specified, which shifts all
+of the command line option verification code out of your application into the
+library.  This is just one example of how using flags can alter the default
+behaviour of the library, on a per-option basis.  By adding one of the
+declarations above, the ``-help`` option synopsis is now extended to:
+
+::
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    -help             - display available options (-help-hidden for more)
+    -o <filename>     - Specify output filename
+
+... indicating that an input filename is expected.
+
+Boolean Arguments
+-----------------
+
+In addition to input and output filenames, we would like the compiler example to
+support three boolean flags: "``-f``" to force writing binary output to a
+terminal, "``--quiet``" to enable quiet mode, and "``-q``" for backwards
+compatibility with some of our users.  We can support these by declaring options
+of boolean type like this:
+
+.. code-block:: c++
+
+  cl::opt<bool> Force ("f", cl::desc("Enable binary output on terminals"));
+  cl::opt<bool> Quiet ("quiet", cl::desc("Don't print informational messages"));
+  cl::opt<bool> Quiet2("q", cl::desc("Don't print informational messages"), cl::Hidden);
+
+This does what you would expect: it declares three boolean variables
+("``Force``", "``Quiet``", and "``Quiet2``") to recognize these options.  Note
+that the "``-q``" option is specified with the "`cl::Hidden`_" flag.  This
+modifier prevents it from being shown by the standard "``-help``" output (note
+that it is still shown in the "``-help-hidden``" output).
+
+The CommandLine library uses a `different parser`_ for different data types.
+For example, in the string case, the argument passed to the option is copied
+literally into the content of the string variable... we obviously cannot do that
+in the boolean case, however, so we must use a smarter parser.  In the case of
+the boolean parser, it allows no options (in which case it assigns the value of
+true to the variable), or it allows the values "``true``" or "``false``" to be
+specified, allowing any of the following inputs:
+
+::
+
+  compiler -f          # No value, 'Force' == true
+  compiler -f=true     # Value specified, 'Force' == true
+  compiler -f=TRUE     # Value specified, 'Force' == true
+  compiler -f=FALSE    # Value specified, 'Force' == false
+
+... you get the idea.  The `bool parser`_ just turns the string values into
+boolean values, and rejects things like '``compiler -f=foo``'.  Similarly, the
+`float`_, `double`_, and `int`_ parsers work like you would expect, using the
+'``strtol``' and '``strtod``' C library calls to parse the string value into the
+specified data type.
+
+With the declarations above, "``compiler -help``" emits this:
+
+::
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    -f     - Enable binary output on terminals
+    -o     - Override output filename
+    -quiet - Don't print informational messages
+    -help  - display available options (-help-hidden for more)
+
+and "``compiler -help-hidden``" prints this:
+
+::
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    -f     - Enable binary output on terminals
+    -o     - Override output filename
+    -q     - Don't print informational messages
+    -quiet - Don't print informational messages
+    -help  - display available options (-help-hidden for more)
+
+This brief example has shown you how to use the '`cl::opt`_' class to parse
+simple scalar command line arguments.  In addition to simple scalar arguments,
+the CommandLine library also provides primitives to support CommandLine option
+`aliases`_, and `lists`_ of options.
+
+.. _aliases:
+
+Argument Aliases
+----------------
+
+So far, the example works well, except for the fact that we need to check the
+quiet condition like this now:
+
+.. code-block:: c++
+
+  ...
+    if (!Quiet && !Quiet2) printInformationalMessage(...);
+  ...
+
+... which is a real pain!  Instead of defining two values for the same
+condition, we can use the "`cl::alias`_" class to make the "``-q``" option an
+**alias** for the "``-quiet``" option, instead of providing a value itself:
+
+.. code-block:: c++
+
+  cl::opt<bool> Force ("f", cl::desc("Overwrite output files"));
+  cl::opt<bool> Quiet ("quiet", cl::desc("Don't print informational messages"));
+  cl::alias     QuietA("q", cl::desc("Alias for -quiet"), cl::aliasopt(Quiet));
+
+The third line (which is the only one we modified from above) defines a "``-q``"
+alias that updates the "``Quiet``" variable (as specified by the `cl::aliasopt`_
+modifier) whenever it is specified.  Because aliases do not hold state, the only
+thing the program has to query is the ``Quiet`` variable now.  Another nice
+feature of aliases is that they automatically hide themselves from the ``-help``
+output (although, again, they are still visible in the ``-help-hidden output``).
+
+Now the application code can simply use:
+
+.. code-block:: c++
+
+  ...
+    if (!Quiet) printInformationalMessage(...);
+  ...
+
+... which is much nicer!  The "`cl::alias`_" can be used to specify an
+alternative name for any variable type, and has many uses.
+
+.. _unnamed alternatives using the generic parser:
+
+Selecting an alternative from a set of possibilities
+----------------------------------------------------
+
+So far we have seen how the CommandLine library handles builtin types like
+``std::string``, ``bool`` and ``int``, but how does it handle things it doesn't
+know about, like enums or '``int*``'s?
+
+The answer is that it uses a table-driven generic parser (unless you specify
+your own parser, as described in the `Extension Guide`_).  This parser maps
+literal strings to whatever type is required, and requires you to tell it what
+this mapping should be.
+
+Let's say that we would like to add four optimization levels to our optimizer,
+using the standard flags "``-g``", "``-O0``", "``-O1``", and "``-O2``".  We
+could easily implement this with boolean options like above, but there are
+several problems with this strategy:
+
+#. A user could specify more than one of the options at a time, for example,
+   "``compiler -O3 -O2``".  The CommandLine library would not be able to catch
+   this erroneous input for us.
+
+#. We would have to test 4 different variables to see which ones are set.
+
+#. This doesn't map to the numeric levels that we want... so we cannot easily
+   see if some level >= "``-O1``" is enabled.
+
+To cope with these problems, we can use an enum value, and have the CommandLine
+library fill it in with the appropriate level directly, which is used like this:
+
+.. code-block:: c++
+
+  enum OptLevel {
+    g, O1, O2, O3
+  };
+
+  cl::opt<OptLevel> OptimizationLevel(cl::desc("Choose optimization level:"),
+    cl::values(
+      clEnumVal(g , "No optimizations, enable debugging"),
+      clEnumVal(O1, "Enable trivial optimizations"),
+      clEnumVal(O2, "Enable default optimizations"),
+      clEnumVal(O3, "Enable expensive optimizations"),
+     clEnumValEnd));
+
+  ...
+    if (OptimizationLevel >= O2) doPartialRedundancyElimination(...);
+  ...
+
+This declaration defines a variable "``OptimizationLevel``" of the
+"``OptLevel``" enum type.  This variable can be assigned any of the values that
+are listed in the declaration (Note that the declaration list must be terminated
+with the "``clEnumValEnd``" argument!).  The CommandLine library enforces that
+the user can only specify one of the options, and it ensure that only valid enum
+values can be specified.  The "``clEnumVal``" macros ensure that the command
+line arguments matched the enum values.  With this option added, our help output
+now is:
+
+::
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    Choose optimization level:
+      -g          - No optimizations, enable debugging
+      -O1         - Enable trivial optimizations
+      -O2         - Enable default optimizations
+      -O3         - Enable expensive optimizations
+    -f            - Enable binary output on terminals
+    -help         - display available options (-help-hidden for more)
+    -o <filename> - Specify output filename
+    -quiet        - Don't print informational messages
+
+In this case, it is sort of awkward that flag names correspond directly to enum
+names, because we probably don't want a enum definition named "``g``" in our
+program.  Because of this, we can alternatively write this example like this:
+
+.. code-block:: c++
+
+  enum OptLevel {
+    Debug, O1, O2, O3
+  };
+
+  cl::opt<OptLevel> OptimizationLevel(cl::desc("Choose optimization level:"),
+    cl::values(
+     clEnumValN(Debug, "g", "No optimizations, enable debugging"),
+      clEnumVal(O1        , "Enable trivial optimizations"),
+      clEnumVal(O2        , "Enable default optimizations"),
+      clEnumVal(O3        , "Enable expensive optimizations"),
+     clEnumValEnd));
+
+  ...
+    if (OptimizationLevel == Debug) outputDebugInfo(...);
+  ...
+
+By using the "``clEnumValN``" macro instead of "``clEnumVal``", we can directly
+specify the name that the flag should get.  In general a direct mapping is nice,
+but sometimes you can't or don't want to preserve the mapping, which is when you
+would use it.
+
+Named Alternatives
+------------------
+
+Another useful argument form is a named alternative style.  We shall use this
+style in our compiler to specify different debug levels that can be used.
+Instead of each debug level being its own switch, we want to support the
+following options, of which only one can be specified at a time:
+"``--debug-level=none``", "``--debug-level=quick``",
+"``--debug-level=detailed``".  To do this, we use the exact same format as our
+optimization level flags, but we also specify an option name.  For this case,
+the code looks like this:
+
+.. code-block:: c++
+
+  enum DebugLev {
+    nodebuginfo, quick, detailed
+  };
+
+  // Enable Debug Options to be specified on the command line
+  cl::opt<DebugLev> DebugLevel("debug_level", cl::desc("Set the debugging level:"),
+    cl::values(
+      clEnumValN(nodebuginfo, "none", "disable debug information"),
+       clEnumVal(quick,               "enable quick debug information"),
+       clEnumVal(detailed,            "enable detailed debug information"),
+      clEnumValEnd));
+
+This definition defines an enumerated command line variable of type "``enum
+DebugLev``", which works exactly the same way as before.  The difference here is
+just the interface exposed to the user of your program and the help output by
+the "``-help``" option:
+
+::
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    Choose optimization level:
+      -g          - No optimizations, enable debugging
+      -O1         - Enable trivial optimizations
+      -O2         - Enable default optimizations
+      -O3         - Enable expensive optimizations
+    -debug_level  - Set the debugging level:
+      =none       - disable debug information
+      =quick      - enable quick debug information
+      =detailed   - enable detailed debug information
+    -f            - Enable binary output on terminals
+    -help         - display available options (-help-hidden for more)
+    -o <filename> - Specify output filename
+    -quiet        - Don't print informational messages
+
+Again, the only structural difference between the debug level declaration and
+the optimization level declaration is that the debug level declaration includes
+an option name (``"debug_level"``), which automatically changes how the library
+processes the argument.  The CommandLine library supports both forms so that you
+can choose the form most appropriate for your application.
+
+.. _lists:
+
+Parsing a list of options
+-------------------------
+
+Now that we have the standard run-of-the-mill argument types out of the way,
+lets get a little wild and crazy.  Lets say that we want our optimizer to accept
+a **list** of optimizations to perform, allowing duplicates.  For example, we
+might want to run: "``compiler -dce -constprop -inline -dce -strip``".  In this
+case, the order of the arguments and the number of appearances is very
+important.  This is what the "``cl::list``" template is for.  First, start by
+defining an enum of the optimizations that you would like to perform:
+
+.. code-block:: c++
+
+  enum Opts {
+    // 'inline' is a C++ keyword, so name it 'inlining'
+    dce, constprop, inlining, strip
+  };
+
+Then define your "``cl::list``" variable:
+
+.. code-block:: c++
+
+  cl::list<Opts> OptimizationList(cl::desc("Available Optimizations:"),
+    cl::values(
+      clEnumVal(dce               , "Dead Code Elimination"),
+      clEnumVal(constprop         , "Constant Propagation"),
+     clEnumValN(inlining, "inline", "Procedure Integration"),
+      clEnumVal(strip             , "Strip Symbols"),
+    clEnumValEnd));
+
+This defines a variable that is conceptually of the type
+"``std::vector<enum Opts>``".  Thus, you can access it with standard vector
+methods:
+
+.. code-block:: c++
+
+  for (unsigned i = 0; i != OptimizationList.size(); ++i)
+    switch (OptimizationList[i])
+       ...
+
+... to iterate through the list of options specified.
+
+Note that the "``cl::list``" template is completely general and may be used with
+any data types or other arguments that you can use with the "``cl::opt``"
+template.  One especially useful way to use a list is to capture all of the
+positional arguments together if there may be more than one specified.  In the
+case of a linker, for example, the linker takes several '``.o``' files, and
+needs to capture them into a list.  This is naturally specified as:
+
+.. code-block:: c++
+
+  ...
+  cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<Input files>"), cl::OneOrMore);
+  ...
+
+This variable works just like a "``vector<string>``" object.  As such, accessing
+the list is simple, just like above.  In this example, we used the
+`cl::OneOrMore`_ modifier to inform the CommandLine library that it is an error
+if the user does not specify any ``.o`` files on our command line.  Again, this
+just reduces the amount of checking we have to do.
+
+Collecting options as a set of flags
+------------------------------------
+
+Instead of collecting sets of options in a list, it is also possible to gather
+information for enum values in a **bit vector**.  The representation used by the
+`cl::bits`_ class is an ``unsigned`` integer.  An enum value is represented by a
+0/1 in the enum's ordinal value bit position. 1 indicating that the enum was
+specified, 0 otherwise.  As each specified value is parsed, the resulting enum's
+bit is set in the option's bit vector:
+
+.. code-block:: c++
+
+  bits |= 1 << (unsigned)enum;
+
+Options that are specified multiple times are redundant.  Any instances after
+the first are discarded.
+
+Reworking the above list example, we could replace `cl::list`_ with `cl::bits`_:
+
+.. code-block:: c++
+
+  cl::bits<Opts> OptimizationBits(cl::desc("Available Optimizations:"),
+    cl::values(
+      clEnumVal(dce               , "Dead Code Elimination"),
+      clEnumVal(constprop         , "Constant Propagation"),
+     clEnumValN(inlining, "inline", "Procedure Integration"),
+      clEnumVal(strip             , "Strip Symbols"),
+    clEnumValEnd));
+
+To test to see if ``constprop`` was specified, we can use the ``cl:bits::isSet``
+function:
+
+.. code-block:: c++
+
+  if (OptimizationBits.isSet(constprop)) {
+    ...
+  }
+
+It's also possible to get the raw bit vector using the ``cl::bits::getBits``
+function:
+
+.. code-block:: c++
+
+  unsigned bits = OptimizationBits.getBits();
+
+Finally, if external storage is used, then the location specified must be of
+**type** ``unsigned``. In all other ways a `cl::bits`_ option is equivalent to a
+`cl::list`_ option.
+
+.. _additional extra text:
+
+Adding freeform text to help output
+-----------------------------------
+
+As our program grows and becomes more mature, we may decide to put summary
+information about what it does into the help output.  The help output is styled
+to look similar to a Unix ``man`` page, providing concise information about a
+program.  Unix ``man`` pages, however often have a description about what the
+program does.  To add this to your CommandLine program, simply pass a third
+argument to the `cl::ParseCommandLineOptions`_ call in main.  This additional
+argument is then printed as the overview information for your program, allowing
+you to include any additional information that you want.  For example:
+
+.. code-block:: c++
+
+  int main(int argc, char **argv) {
+    cl::ParseCommandLineOptions(argc, argv, " CommandLine compiler example\n\n"
+                                "  This program blah blah blah...\n");
+    ...
+  }
+
+would yield the help output:
+
+::
+
+  **OVERVIEW: CommandLine compiler example
+
+    This program blah blah blah...**
+
+  USAGE: compiler [options] <input file>
+
+  OPTIONS:
+    ...
+    -help             - display available options (-help-hidden for more)
+    -o <filename>     - Specify output filename
+
+.. _Reference Guide:
+
+Reference Guide
+===============
+
+Now that you know the basics of how to use the CommandLine library, this section
+will give you the detailed information you need to tune how command line options
+work, as well as information on more "advanced" command line option processing
+capabilities.
+
+.. _positional:
+.. _positional argument:
+.. _Positional Arguments:
+.. _Positional arguments section:
+.. _positional options:
+
+Positional Arguments
+--------------------
+
+Positional arguments are those arguments that are not named, and are not
+specified with a hyphen.  Positional arguments should be used when an option is
+specified by its position alone.  For example, the standard Unix ``grep`` tool
+takes a regular expression argument, and an optional filename to search through
+(which defaults to standard input if a filename is not specified).  Using the
+CommandLine library, this would be specified as:
+
+.. code-block:: c++
+
+  cl::opt<string> Regex   (cl::Positional, cl::desc("<regular expression>"), cl::Required);
+  cl::opt<string> Filename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+Given these two option declarations, the ``-help`` output for our grep
+replacement would look like this:
+
+::
+
+  USAGE: spiffygrep [options] <regular expression> <input file>
+
+  OPTIONS:
+    -help - display available options (-help-hidden for more)
+
+... and the resultant program could be used just like the standard ``grep``
+tool.
+
+Positional arguments are sorted by their order of construction.  This means that
+command line options will be ordered according to how they are listed in a .cpp
+file, but will not have an ordering defined if the positional arguments are
+defined in multiple .cpp files.  The fix for this problem is simply to define
+all of your positional arguments in one .cpp file.
+
+Specifying positional options with hyphens
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes you may want to specify a value to your positional argument that
+starts with a hyphen (for example, searching for '``-foo``' in a file).  At
+first, you will have trouble doing this, because it will try to find an argument
+named '``-foo``', and will fail (and single quotes will not save you).  Note
+that the system ``grep`` has the same problem:
+
+::
+
+  $ spiffygrep '-foo' test.txt
+  Unknown command line argument '-foo'.  Try: spiffygrep -help'
+
+  $ grep '-foo' test.txt
+  grep: illegal option -- f
+  grep: illegal option -- o
+  grep: illegal option -- o
+  Usage: grep -hblcnsviw pattern file . . .
+
+The solution for this problem is the same for both your tool and the system
+version: use the '``--``' marker.  When the user specifies '``--``' on the
+command line, it is telling the program that all options after the '``--``'
+should be treated as positional arguments, not options.  Thus, we can use it
+like this:
+
+::
+
+  $ spiffygrep -- -foo test.txt
+    ...output...
+
+Determining absolute position with getPosition()
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes an option can affect or modify the meaning of another option. For
+example, consider ``gcc``'s ``-x LANG`` option. This tells ``gcc`` to ignore the
+suffix of subsequent positional arguments and force the file to be interpreted
+as if it contained source code in language ``LANG``. In order to handle this
+properly, you need to know the absolute position of each argument, especially
+those in lists, so their interaction(s) can be applied correctly. This is also
+useful for options like ``-llibname`` which is actually a positional argument
+that starts with a dash.
+
+So, generally, the problem is that you have two ``cl::list`` variables that
+interact in some way. To ensure the correct interaction, you can use the
+``cl::list::getPosition(optnum)`` method. This method returns the absolute
+position (as found on the command line) of the ``optnum`` item in the
+``cl::list``.
+
+The idiom for usage is like this:
+
+.. code-block:: c++
+
+  static cl::list<std::string> Files(cl::Positional, cl::OneOrMore);
+  static cl::list<std::string> Libraries("l", cl::ZeroOrMore);
+
+  int main(int argc, char**argv) {
+    // ...
+    std::vector<std::string>::iterator fileIt = Files.begin();
+    std::vector<std::string>::iterator libIt  = Libraries.begin();
+    unsigned libPos = 0, filePos = 0;
+    while ( 1 ) {
+      if ( libIt != Libraries.end() )
+        libPos = Libraries.getPosition( libIt - Libraries.begin() );
+      else
+        libPos = 0;
+      if ( fileIt != Files.end() )
+        filePos = Files.getPosition( fileIt - Files.begin() );
+      else
+        filePos = 0;
+
+      if ( filePos != 0 && (libPos == 0 || filePos < libPos) ) {
+        // Source File Is next
+        ++fileIt;
+      }
+      else if ( libPos != 0 && (filePos == 0 || libPos < filePos) ) {
+        // Library is next
+        ++libIt;
+      }
+      else
+        break; // we're done with the list
+    }
+  }
+
+Note that, for compatibility reasons, the ``cl::opt`` also supports an
+``unsigned getPosition()`` option that will provide the absolute position of
+that option. You can apply the same approach as above with a ``cl::opt`` and a
+``cl::list`` option as you can with two lists.
+
+.. _interpreter style options:
+.. _cl::ConsumeAfter:
+.. _this section for more information:
+
+The ``cl::ConsumeAfter`` modifier
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::ConsumeAfter`` `formatting option`_ is used to construct programs that
+use "interpreter style" option processing.  With this style of option
+processing, all arguments specified after the last positional argument are
+treated as special interpreter arguments that are not interpreted by the command
+line argument.
+
+As a concrete example, lets say we are developing a replacement for the standard
+Unix Bourne shell (``/bin/sh``).  To run ``/bin/sh``, first you specify options
+to the shell itself (like ``-x`` which turns on trace output), then you specify
+the name of the script to run, then you specify arguments to the script.  These
+arguments to the script are parsed by the Bourne shell command line option
+processor, but are not interpreted as options to the shell itself.  Using the
+CommandLine library, we would specify this as:
+
+.. code-block:: c++
+
+  cl::opt<string> Script(cl::Positional, cl::desc("<input script>"), cl::init("-"));
+  cl::list<string>  Argv(cl::ConsumeAfter, cl::desc("<program arguments>..."));
+  cl::opt<bool>    Trace("x", cl::desc("Enable trace output"));
+
+which automatically provides the help output:
+
+::
+
+  USAGE: spiffysh [options] <input script> <program arguments>...
+
+  OPTIONS:
+    -help - display available options (-help-hidden for more)
+    -x    - Enable trace output
+
+At runtime, if we run our new shell replacement as ```spiffysh -x test.sh -a -x
+-y bar``', the ``Trace`` variable will be set to true, the ``Script`` variable
+will be set to "``test.sh``", and the ``Argv`` list will contain ``["-a", "-x",
+"-y", "bar"]``, because they were specified after the last positional argument
+(which is the script name).
+
+There are several limitations to when ``cl::ConsumeAfter`` options can be
+specified.  For example, only one ``cl::ConsumeAfter`` can be specified per
+program, there must be at least one `positional argument`_ specified, there must
+not be any `cl::list`_ positional arguments, and the ``cl::ConsumeAfter`` option
+should be a `cl::list`_ option.
+
+.. _can be changed:
+.. _Internal vs External Storage:
+
+Internal vs External Storage
+----------------------------
+
+By default, all command line options automatically hold the value that they
+parse from the command line.  This is very convenient in the common case,
+especially when combined with the ability to define command line options in the
+files that use them.  This is called the internal storage model.
+
+Sometimes, however, it is nice to separate the command line option processing
+code from the storage of the value parsed.  For example, lets say that we have a
+'``-debug``' option that we would like to use to enable debug information across
+the entire body of our program.  In this case, the boolean value controlling the
+debug code should be globally accessible (in a header file, for example) yet the
+command line option processing code should not be exposed to all of these
+clients (requiring lots of .cpp files to ``#include CommandLine.h``).
+
+To do this, set up your .h file with your option, like this for example:
+
+.. code-block:: c++
+
+  // DebugFlag.h - Get access to the '-debug' command line option
+  //
+
+  // DebugFlag - This boolean is set to true if the '-debug' command line option
+  // is specified.  This should probably not be referenced directly, instead, use
+  // the DEBUG macro below.
+  //
+  extern bool DebugFlag;
+
+  // DEBUG macro - This macro should be used by code to emit debug information.
+  // In the '-debug' option is specified on the command line, and if this is a
+  // debug build, then the code specified as the option to the macro will be
+  // executed.  Otherwise it will not be.
+  #ifdef NDEBUG
+  #define DEBUG(X)
+  #else
+  #define DEBUG(X) do { if (DebugFlag) { X; } } while (0)
+  #endif
+
+This allows clients to blissfully use the ``DEBUG()`` macro, or the
+``DebugFlag`` explicitly if they want to.  Now we just need to be able to set
+the ``DebugFlag`` boolean when the option is set.  To do this, we pass an
+additional argument to our command line argument processor, and we specify where
+to fill in with the `cl::location`_ attribute:
+
+.. code-block:: c++
+
+  bool DebugFlag;                  // the actual value
+  static cl::opt<bool, true>       // The parser
+  Debug("debug", cl::desc("Enable debug output"), cl::Hidden, cl::location(DebugFlag));
+
+In the above example, we specify "``true``" as the second argument to the
+`cl::opt`_ template, indicating that the template should not maintain a copy of
+the value itself.  In addition to this, we specify the `cl::location`_
+attribute, so that ``DebugFlag`` is automatically set.
+
+Option Attributes
+-----------------
+
+This section describes the basic attributes that you can specify on options.
+
+* The option name attribute (which is required for all options, except
+  `positional options`_) specifies what the option name is.  This option is
+  specified in simple double quotes:
+
+  .. code-block:: c++
+
+    cl::opt<**bool**> Quiet("quiet");
+
+.. _cl::desc(...):
+
+* The **cl::desc** attribute specifies a description for the option to be
+  shown in the ``-help`` output for the program.
+
+.. _cl::value_desc:
+
+* The **cl::value_desc** attribute specifies a string that can be used to
+  fine tune the ``-help`` output for a command line option.  Look `here`_ for an
+  example.
+
+.. _cl::init:
+
+* The **cl::init** attribute specifies an initial value for a `scalar`_
+  option.  If this attribute is not specified then the command line option value
+  defaults to the value created by the default constructor for the
+  type.
+
+  .. warning::
+
+    If you specify both **cl::init** and **cl::location** for an option, you
+    must specify **cl::location** first, so that when the command-line parser
+    sees **cl::init**, it knows where to put the initial value. (You will get an
+    error at runtime if you don't put them in the right order.)
+
+.. _cl::location:
+
+* The **cl::location** attribute where to store the value for a parsed command
+  line option if using external storage.  See the section on `Internal vs
+  External Storage`_ for more information.
+
+.. _cl::aliasopt:
+
+* The **cl::aliasopt** attribute specifies which option a `cl::alias`_ option is
+  an alias for.
+
+.. _cl::values:
+
+* The **cl::values** attribute specifies the string-to-value mapping to be used
+  by the generic parser.  It takes a **clEnumValEnd terminated** list of
+  (option, value, description) triplets that specify the option name, the value
+  mapped to, and the description shown in the ``-help`` for the tool.  Because
+  the generic parser is used most frequently with enum values, two macros are
+  often useful:
+
+  #. The **clEnumVal** macro is used as a nice simple way to specify a triplet
+     for an enum.  This macro automatically makes the option name be the same as
+     the enum name.  The first option to the macro is the enum, the second is
+     the description for the command line option.
+
+  #. The **clEnumValN** macro is used to specify macro options where the option
+     name doesn't equal the enum name.  For this macro, the first argument is
+     the enum value, the second is the flag name, and the second is the
+     description.
+
+  You will get a compile time error if you try to use cl::values with a parser
+  that does not support it.
+
+.. _cl::multi_val:
+
+* The **cl::multi_val** attribute specifies that this option takes has multiple
+  values (example: ``-sectalign segname sectname sectvalue``). This attribute
+  takes one unsigned argument - the number of values for the option. This
+  attribute is valid only on ``cl::list`` options (and will fail with compile
+  error if you try to use it with other option types). It is allowed to use all
+  of the usual modifiers on multi-valued options (besides
+  ``cl::ValueDisallowed``, obviously).
+
+Option Modifiers
+----------------
+
+Option modifiers are the flags and expressions that you pass into the
+constructors for `cl::opt`_ and `cl::list`_.  These modifiers give you the
+ability to tweak how options are parsed and how ``-help`` output is generated to
+fit your application well.
+
+These options fall into five main categories:
+
+#. Hiding an option from ``-help`` output
+
+#. Controlling the number of occurrences required and allowed
+
+#. Controlling whether or not a value must be specified
+
+#. Controlling other formatting options
+
+#. Miscellaneous option modifiers
+
+It is not possible to specify two options from the same category (you'll get a
+runtime error) to a single option, except for options in the miscellaneous
+category.  The CommandLine library specifies defaults for all of these settings
+that are the most useful in practice and the most common, which mean that you
+usually shouldn't have to worry about these.
+
+Hiding an option from ``-help`` output
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::NotHidden``, ``cl::Hidden``, and ``cl::ReallyHidden`` modifiers are
+used to control whether or not an option appears in the ``-help`` and
+``-help-hidden`` output for the compiled program:
+
+.. _cl::NotHidden:
+
+* The **cl::NotHidden** modifier (which is the default for `cl::opt`_ and
+  `cl::list`_ options) indicates the option is to appear in both help
+  listings.
+
+.. _cl::Hidden:
+
+* The **cl::Hidden** modifier (which is the default for `cl::alias`_ options)
+  indicates that the option should not appear in the ``-help`` output, but
+  should appear in the ``-help-hidden`` output.
+
+.. _cl::ReallyHidden:
+
+* The **cl::ReallyHidden** modifier indicates that the option should not appear
+  in any help output.
+
+Controlling the number of occurrences required and allowed
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This group of options is used to control how many time an option is allowed (or
+required) to be specified on the command line of your program.  Specifying a
+value for this setting allows the CommandLine library to do error checking for
+you.
+
+The allowed values for this option group are:
+
+.. _cl::Optional:
+
+* The **cl::Optional** modifier (which is the default for the `cl::opt`_ and
+  `cl::alias`_ classes) indicates that your program will allow either zero or
+  one occurrence of the option to be specified.
+
+.. _cl::ZeroOrMore:
+
+* The **cl::ZeroOrMore** modifier (which is the default for the `cl::list`_
+  class) indicates that your program will allow the option to be specified zero
+  or more times.
+
+.. _cl::Required:
+
+* The **cl::Required** modifier indicates that the specified option must be
+  specified exactly one time.
+
+.. _cl::OneOrMore:
+
+* The **cl::OneOrMore** modifier indicates that the option must be specified at
+  least one time.
+
+* The **cl::ConsumeAfter** modifier is described in the `Positional arguments
+  section`_.
+
+If an option is not specified, then the value of the option is equal to the
+value specified by the `cl::init`_ attribute.  If the ``cl::init`` attribute is
+not specified, the option value is initialized with the default constructor for
+the data type.
+
+If an option is specified multiple times for an option of the `cl::opt`_ class,
+only the last value will be retained.
+
+Controlling whether or not a value must be specified
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This group of options is used to control whether or not the option allows a
+value to be present.  In the case of the CommandLine library, a value is either
+specified with an equal sign (e.g. '``-index-depth=17``') or as a trailing
+string (e.g. '``-o a.out``').
+
+The allowed values for this option group are:
+
+.. _cl::ValueOptional:
+
+* The **cl::ValueOptional** modifier (which is the default for ``bool`` typed
+  options) specifies that it is acceptable to have a value, or not.  A boolean
+  argument can be enabled just by appearing on the command line, or it can have
+  an explicit '``-foo=true``'.  If an option is specified with this mode, it is
+  illegal for the value to be provided without the equal sign.  Therefore
+  '``-foo true``' is illegal.  To get this behavior, you must use
+  the `cl::ValueRequired`_ modifier.
+
+.. _cl::ValueRequired:
+
+* The **cl::ValueRequired** modifier (which is the default for all other types
+  except for `unnamed alternatives using the generic parser`_) specifies that a
+  value must be provided.  This mode informs the command line library that if an
+  option is not provides with an equal sign, that the next argument provided
+  must be the value.  This allows things like '``-o a.out``' to work.
+
+.. _cl::ValueDisallowed:
+
+* The **cl::ValueDisallowed** modifier (which is the default for `unnamed
+  alternatives using the generic parser`_) indicates that it is a runtime error
+  for the user to specify a value.  This can be provided to disallow users from
+  providing options to boolean options (like '``-foo=true``').
+
+In general, the default values for this option group work just like you would
+want them to.  As mentioned above, you can specify the `cl::ValueDisallowed`_
+modifier to a boolean argument to restrict your command line parser.  These
+options are mostly useful when `extending the library`_.
+
+.. _formatting option:
+
+Controlling other formatting options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The formatting option group is used to specify that the command line option has
+special abilities and is otherwise different from other command line arguments.
+As usual, you can only specify one of these arguments at most.
+
+.. _cl::NormalFormatting:
+
+* The **cl::NormalFormatting** modifier (which is the default all options)
+  specifies that this option is "normal".
+
+.. _cl::Positional:
+
+* The **cl::Positional** modifier specifies that this is a positional argument
+  that does not have a command line option associated with it.  See the
+  `Positional Arguments`_ section for more information.
+
+* The **cl::ConsumeAfter** modifier specifies that this option is used to
+  capture "interpreter style" arguments.  See `this section for more
+  information`_.
+
+.. _prefix:
+.. _cl::Prefix:
+
+* The **cl::Prefix** modifier specifies that this option prefixes its value.
+  With 'Prefix' options, the equal sign does not separate the value from the
+  option name specified. Instead, the value is everything after the prefix,
+  including any equal sign if present. This is useful for processing odd
+  arguments like ``-lmalloc`` and ``-L/usr/lib`` in a linker tool or
+  ``-DNAME=value`` in a compiler tool.  Here, the '``l``', '``D``' and '``L``'
+  options are normal string (or list) options, that have the **cl::Prefix**
+  modifier added to allow the CommandLine library to recognize them.  Note that
+  **cl::Prefix** options must not have the **cl::ValueDisallowed** modifier
+  specified.
+
+.. _grouping:
+.. _cl::Grouping:
+
+* The **cl::Grouping** modifier is used to implement Unix-style tools (like
+  ``ls``) that have lots of single letter arguments, but only require a single
+  dash.  For example, the '``ls -labF``' command actually enables four different
+  options, all of which are single letters.  Note that **cl::Grouping** options
+  cannot have values.
+
+The CommandLine library does not restrict how you use the **cl::Prefix** or
+**cl::Grouping** modifiers, but it is possible to specify ambiguous argument
+settings.  Thus, it is possible to have multiple letter options that are prefix
+or grouping options, and they will still work as designed.
+
+To do this, the CommandLine library uses a greedy algorithm to parse the input
+option into (potentially multiple) prefix and grouping options.  The strategy
+basically looks like this:
+
+::
+
+  parse(string OrigInput) {
+
+  1. string input = OrigInput;
+  2. if (isOption(input)) return getOption(input).parse();  // Normal option
+  3. while (!isOption(input) && !input.empty()) input.pop_back();  // Remove the last letter
+  4. if (input.empty()) return error();  // No matching option
+  5. if (getOption(input).isPrefix())
+       return getOption(input).parse(input);
+  6. while (!input.empty()) {  // Must be grouping options
+       getOption(input).parse();
+       OrigInput.erase(OrigInput.begin(), OrigInput.begin()+input.length());
+       input = OrigInput;
+       while (!isOption(input) && !input.empty()) input.pop_back();
+     }
+  7. if (!OrigInput.empty()) error();
+
+  }
+
+Miscellaneous option modifiers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The miscellaneous option modifiers are the only flags where you can specify more
+than one flag from the set: they are not mutually exclusive.  These flags
+specify boolean properties that modify the option.
+
+.. _cl::CommaSeparated:
+
+* The **cl::CommaSeparated** modifier indicates that any commas specified for an
+  option's value should be used to split the value up into multiple values for
+  the option.  For example, these two options are equivalent when
+  ``cl::CommaSeparated`` is specified: "``-foo=a -foo=b -foo=c``" and
+  "``-foo=a,b,c``".  This option only makes sense to be used in a case where the
+  option is allowed to accept one or more values (i.e. it is a `cl::list`_
+  option).
+
+.. _cl::PositionalEatsArgs:
+
+* The **cl::PositionalEatsArgs** modifier (which only applies to positional
+  arguments, and only makes sense for lists) indicates that positional argument
+  should consume any strings after it (including strings that start with a "-")
+  up until another recognized positional argument.  For example, if you have two
+  "eating" positional arguments, "``pos1``" and "``pos2``", the string "``-pos1
+  -foo -bar baz -pos2 -bork``" would cause the "``-foo -bar -baz``" strings to
+  be applied to the "``-pos1``" option and the "``-bork``" string to be applied
+  to the "``-pos2``" option.
+
+.. _cl::Sink:
+
+* The **cl::Sink** modifier is used to handle unknown options. If there is at
+  least one option with ``cl::Sink`` modifier specified, the parser passes
+  unrecognized option strings to it as values instead of signaling an error. As
+  with ``cl::CommaSeparated``, this modifier only makes sense with a `cl::list`_
+  option.
+
+So far, these are the only three miscellaneous option modifiers.
+
+.. _response files:
+
+Response files
+^^^^^^^^^^^^^^
+
+Some systems, such as certain variants of Microsoft Windows and some older
+Unices have a relatively low limit on command-line length. It is therefore
+customary to use the so-called 'response files' to circumvent this
+restriction. These files are mentioned on the command-line (using the "@file")
+syntax. The program reads these files and inserts the contents into argv,
+thereby working around the command-line length limits. Response files are
+enabled by an optional fourth argument to `cl::ParseEnvironmentOptions`_ and
+`cl::ParseCommandLineOptions`_.
+
+Top-Level Classes and Functions
+-------------------------------
+
+Despite all of the built-in flexibility, the CommandLine option library really
+only consists of one function `cl::ParseCommandLineOptions`_) and three main
+classes: `cl::opt`_, `cl::list`_, and `cl::alias`_.  This section describes
+these three classes in detail.
+
+.. _cl::ParseCommandLineOptions:
+
+The ``cl::ParseCommandLineOptions`` function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::ParseCommandLineOptions`` function is designed to be called directly
+from ``main``, and is used to fill in the values of all of the command line
+option variables once ``argc`` and ``argv`` are available.
+
+The ``cl::ParseCommandLineOptions`` function requires two parameters (``argc``
+and ``argv``), but may also take an optional third parameter which holds
+`additional extra text`_ to emit when the ``-help`` option is invoked, and a
+fourth boolean parameter that enables `response files`_.
+
+.. _cl::ParseEnvironmentOptions:
+
+The ``cl::ParseEnvironmentOptions`` function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::ParseEnvironmentOptions`` function has mostly the same effects as
+`cl::ParseCommandLineOptions`_, except that it is designed to take values for
+options from an environment variable, for those cases in which reading the
+command line is not convenient or desired. It fills in the values of all the
+command line option variables just like `cl::ParseCommandLineOptions`_ does.
+
+It takes four parameters: the name of the program (since ``argv`` may not be
+available, it can't just look in ``argv[0]``), the name of the environment
+variable to examine, the optional `additional extra text`_ to emit when the
+``-help`` option is invoked, and the boolean switch that controls whether
+`response files`_ should be read.
+
+``cl::ParseEnvironmentOptions`` will break the environment variable's value up
+into words and then process them using `cl::ParseCommandLineOptions`_.
+**Note:** Currently ``cl::ParseEnvironmentOptions`` does not support quoting, so
+an environment variable containing ``-option "foo bar"`` will be parsed as three
+words, ``-option``, ``"foo``, and ``bar"``, which is different from what you
+would get from the shell with the same input.
+
+The ``cl::SetVersionPrinter`` function
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::SetVersionPrinter`` function is designed to be called directly from
+``main`` and *before* ``cl::ParseCommandLineOptions``. Its use is optional. It
+simply arranges for a function to be called in response to the ``--version``
+option instead of having the ``CommandLine`` library print out the usual version
+string for LLVM. This is useful for programs that are not part of LLVM but wish
+to use the ``CommandLine`` facilities. Such programs should just define a small
+function that takes no arguments and returns ``void`` and that prints out
+whatever version information is appropriate for the program. Pass the address of
+that function to ``cl::SetVersionPrinter`` to arrange for it to be called when
+the ``--version`` option is given by the user.
+
+.. _cl::opt:
+.. _scalar:
+
+The ``cl::opt`` class
+^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::opt`` class is the class used to represent scalar command line
+options, and is the one used most of the time.  It is a templated class which
+can take up to three arguments (all except for the first have default values
+though):
+
+.. code-block:: c++
+
+  namespace cl {
+    template <class DataType, bool ExternalStorage = false,
+              class ParserClass = parser<DataType> >
+    class opt;
+  }
+
+The first template argument specifies what underlying data type the command line
+argument is, and is used to select a default parser implementation.  The second
+template argument is used to specify whether the option should contain the
+storage for the option (the default) or whether external storage should be used
+to contain the value parsed for the option (see `Internal vs External Storage`_
+for more information).
+
+The third template argument specifies which parser to use.  The default value
+selects an instantiation of the ``parser`` class based on the underlying data
+type of the option.  In general, this default works well for most applications,
+so this option is only used when using a `custom parser`_.
+
+.. _lists of arguments:
+.. _cl::list:
+
+The ``cl::list`` class
+^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::list`` class is the class used to represent a list of command line
+options.  It too is a templated class which can take up to three arguments:
+
+.. code-block:: c++
+
+  namespace cl {
+    template <class DataType, class Storage = bool,
+              class ParserClass = parser<DataType> >
+    class list;
+  }
+
+This class works the exact same as the `cl::opt`_ class, except that the second
+argument is the **type** of the external storage, not a boolean value.  For this
+class, the marker type '``bool``' is used to indicate that internal storage
+should be used.
+
+.. _cl::bits:
+
+The ``cl::bits`` class
+^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::bits`` class is the class used to represent a list of command line
+options in the form of a bit vector.  It is also a templated class which can
+take up to three arguments:
+
+.. code-block:: c++
+
+  namespace cl {
+    template <class DataType, class Storage = bool,
+              class ParserClass = parser<DataType> >
+    class bits;
+  }
+
+This class works the exact same as the `cl::list`_ class, except that the second
+argument must be of **type** ``unsigned`` if external storage is used.
+
+.. _cl::alias:
+
+The ``cl::alias`` class
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::alias`` class is a nontemplated class that is used to form aliases for
+other arguments.
+
+.. code-block:: c++
+
+  namespace cl {
+    class alias;
+  }
+
+The `cl::aliasopt`_ attribute should be used to specify which option this is an
+alias for.  Alias arguments default to being `cl::Hidden`_, and use the aliased
+options parser to do the conversion from string to data.
+
+.. _cl::extrahelp:
+
+The ``cl::extrahelp`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``cl::extrahelp`` class is a nontemplated class that allows extra help text
+to be printed out for the ``-help`` option.
+
+.. code-block:: c++
+
+  namespace cl {
+    struct extrahelp;
+  }
+
+To use the extrahelp, simply construct one with a ``const char*`` parameter to
+the constructor. The text passed to the constructor will be printed at the
+bottom of the help message, verbatim. Note that multiple ``cl::extrahelp``
+**can** be used, but this practice is discouraged. If your tool needs to print
+additional help information, put all that help into a single ``cl::extrahelp``
+instance.
+
+For example:
+
+.. code-block:: c++
+
+  cl::extrahelp("\nADDITIONAL HELP:\n\n  This is the extra help\n");
+
+.. _different parser:
+.. _discussed previously:
+
+Builtin parsers
+---------------
+
+Parsers control how the string value taken from the command line is translated
+into a typed value, suitable for use in a C++ program.  By default, the
+CommandLine library uses an instance of ``parser<type>`` if the command line
+option specifies that it uses values of type '``type``'.  Because of this,
+custom option processing is specified with specializations of the '``parser``'
+class.
+
+The CommandLine library provides the following builtin parser specializations,
+which are sufficient for most applications. It can, however, also be extended to
+work with new data types and new ways of interpreting the same data.  See the
+`Writing a Custom Parser`_ for more details on this type of library extension.
+
+.. _enums:
+.. _cl::parser:
+
+* The generic ``parser<t>`` parser can be used to map strings values to any data
+  type, through the use of the `cl::values`_ property, which specifies the
+  mapping information.  The most common use of this parser is for parsing enum
+  values, which allows you to use the CommandLine library for all of the error
+  checking to make sure that only valid enum values are specified (as opposed to
+  accepting arbitrary strings).  Despite this, however, the generic parser class
+  can be used for any data type.
+
+.. _boolean flags:
+.. _bool parser:
+
+* The **parser<bool> specialization** is used to convert boolean strings to a
+  boolean value.  Currently accepted strings are "``true``", "``TRUE``",
+  "``True``", "``1``", "``false``", "``FALSE``", "``False``", and "``0``".
+
+* The **parser<boolOrDefault> specialization** is used for cases where the value
+  is boolean, but we also need to know whether the option was specified at all.
+  boolOrDefault is an enum with 3 values, BOU_UNSET, BOU_TRUE and BOU_FALSE.
+  This parser accepts the same strings as **``parser<bool>``**.
+
+.. _strings:
+
+* The **parser<string> specialization** simply stores the parsed string into the
+  string value specified.  No conversion or modification of the data is
+  performed.
+
+.. _integers:
+.. _int:
+
+* The **parser<int> specialization** uses the C ``strtol`` function to parse the
+  string input.  As such, it will accept a decimal number (with an optional '+'
+  or '-' prefix) which must start with a non-zero digit.  It accepts octal
+  numbers, which are identified with a '``0``' prefix digit, and hexadecimal
+  numbers with a prefix of '``0x``' or '``0X``'.
+
+.. _doubles:
+.. _float:
+.. _double:
+
+* The **parser<double>** and **parser<float> specializations** use the standard
+  C ``strtod`` function to convert floating point strings into floating point
+  values.  As such, a broad range of string formats is supported, including
+  exponential notation (ex: ``1.7e15``) and properly supports locales.
+
+.. _Extension Guide:
+.. _extending the library:
+
+Extension Guide
+===============
+
+Although the CommandLine library has a lot of functionality built into it
+already (as discussed previously), one of its true strengths lie in its
+extensibility.  This section discusses how the CommandLine library works under
+the covers and illustrates how to do some simple, common, extensions.
+
+.. _Custom parsers:
+.. _custom parser:
+.. _Writing a Custom Parser:
+
+Writing a custom parser
+-----------------------
+
+One of the simplest and most common extensions is the use of a custom parser.
+As `discussed previously`_, parsers are the portion of the CommandLine library
+that turns string input from the user into a particular parsed data type,
+validating the input in the process.
+
+There are two ways to use a new parser:
+
+#. Specialize the `cl::parser`_ template for your custom data type.
+
+   This approach has the advantage that users of your custom data type will
+   automatically use your custom parser whenever they define an option with a
+   value type of your data type.  The disadvantage of this approach is that it
+   doesn't work if your fundamental data type is something that is already
+   supported.
+
+#. Write an independent class, using it explicitly from options that need it.
+
+   This approach works well in situations where you would line to parse an
+   option using special syntax for a not-very-special data-type.  The drawback
+   of this approach is that users of your parser have to be aware that they are
+   using your parser instead of the builtin ones.
+
+To guide the discussion, we will discuss a custom parser that accepts file
+sizes, specified with an optional unit after the numeric size.  For example, we
+would like to parse "102kb", "41M", "1G" into the appropriate integer value.  In
+this case, the underlying data type we want to parse into is '``unsigned``'.  We
+choose approach #2 above because we don't want to make this the default for all
+``unsigned`` options.
+
+To start out, we declare our new ``FileSizeParser`` class:
+
+.. code-block:: c++
+
+  struct FileSizeParser : public cl::basic_parser<unsigned> {
+    // parse - Return true on error.
+    bool parse(cl::Option &O, const char *ArgName, const std::string &ArgValue,
+               unsigned &Val);
+  };
+
+Our new class inherits from the ``cl::basic_parser`` template class to fill in
+the default, boiler plate code for us.  We give it the data type that we parse
+into, the last argument to the ``parse`` method, so that clients of our custom
+parser know what object type to pass in to the parse method.  (Here we declare
+that we parse into '``unsigned``' variables.)
+
+For most purposes, the only method that must be implemented in a custom parser
+is the ``parse`` method.  The ``parse`` method is called whenever the option is
+invoked, passing in the option itself, the option name, the string to parse, and
+a reference to a return value.  If the string to parse is not well-formed, the
+parser should output an error message and return true.  Otherwise it should
+return false and set '``Val``' to the parsed value.  In our example, we
+implement ``parse`` as:
+
+.. code-block:: c++
+
+  bool FileSizeParser::parse(cl::Option &O, const char *ArgName,
+                             const std::string &Arg, unsigned &Val) {
+    const char *ArgStart = Arg.c_str();
+    char *End;
+
+    // Parse integer part, leaving 'End' pointing to the first non-integer char
+    Val = (unsigned)strtol(ArgStart, &End, 0);
+
+    while (1) {
+      switch (*End++) {
+      case 0: return false;   // No error
+      case 'i':               // Ignore the 'i' in KiB if people use that
+      case 'b': case 'B':     // Ignore B suffix
+        break;
+
+      case 'g': case 'G': Val *= 1024*1024*1024; break;
+      case 'm': case 'M': Val *= 1024*1024;      break;
+      case 'k': case 'K': Val *= 1024;           break;
+
+      default:
+        // Print an error message if unrecognized character!
+        return O.error("'" + Arg + "' value invalid for file size argument!");
+      }
+    }
+  }
+
+This function implements a very simple parser for the kinds of strings we are
+interested in.  Although it has some holes (it allows "``123KKK``" for example),
+it is good enough for this example.  Note that we use the option itself to print
+out the error message (the ``error`` method always returns true) in order to get
+a nice error message (shown below).  Now that we have our parser class, we can
+use it like this:
+
+.. code-block:: c++
+
+  static cl::opt<unsigned, false, FileSizeParser>
+  MFS("max-file-size", cl::desc("Maximum file size to accept"),
+      cl::value_desc("size"));
+
+Which adds this to the output of our program:
+
+::
+
+  OPTIONS:
+    -help                 - display available options (-help-hidden for more)
+    ...
+   -max-file-size=<size> - Maximum file size to accept
+
+And we can test that our parse works correctly now (the test program just prints
+out the max-file-size argument value):
+
+::
+
+  $ ./test
+  MFS: 0
+  $ ./test -max-file-size=123MB
+  MFS: 128974848
+  $ ./test -max-file-size=3G
+  MFS: 3221225472
+  $ ./test -max-file-size=dog
+  -max-file-size option: 'dog' value invalid for file size argument!
+
+It looks like it works.  The error message that we get is nice and helpful, and
+we seem to accept reasonable file sizes.  This wraps up the "custom parser"
+tutorial.
+
+Exploiting external storage
+---------------------------
+
+Several of the LLVM libraries define static ``cl::opt`` instances that will
+automatically be included in any program that links with that library.  This is
+a feature. However, sometimes it is necessary to know the value of the command
+line option outside of the library. In these cases the library does or should
+provide an external storage location that is accessible to users of the
+library. Examples of this include the ``llvm::DebugFlag`` exported by the
+``lib/Support/Debug.cpp`` file and the ``llvm::TimePassesIsEnabled`` flag
+exported by the ``lib/VMCore/PassManager.cpp`` file.
+
+.. todo::
+
+  TODO: complete this section
+
+.. _dynamically loaded options:
+
+Dynamically adding command line options
+
+.. todo::
+
+  TODO: fill in this section
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 96e4af3..cda281a 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -152,33 +152,10 @@ committed are reviewed after they go in: you don't want everyone to assume
 someone else will review it, allowing the patch to go unreviewed.  To solve this
 problem, we have a notion of an 'owner' for a piece of the code.  The sole
 responsibility of a code owner is to ensure that a commit to their area of the
-code is appropriately reviewed, either by themself or by someone else.  The
-current code owners are:
-
-* **Evan Cheng**: Code generator and all targets
-
-* **Greg Clayton**: LLDB
-
-* **Doug Gregor**: Clang Frontend Libraries
-
-* **Howard Hinnant**: libc++
-
-* **Anton Korobeynikov**: Exception handling, debug information, and Windows
-  codegen
-
-* **Ted Kremenek**: Clang Static Analyzer
-
-* **Chris Lattner**: Everything not covered by someone else
-
-* **John McCall**: Clang LLVM IR generation
-
-* **Jakob Olesen**: Register allocators and TableGen
-
-* **Duncan Sands**: dragonegg and llvm-gcc 4.2
-
-* **Peter Collingbourne**: libclc
-
-* **Tobias Grosser**: polly
+code is appropriately reviewed, either by themself or by someone else.  The list
+of current code owners can be found in the file
+`CODE_OWNERS.TXT <http://llvm.org/viewvc/llvm-project/llvm/trunk/CODE_OWNERS.TXT?view=markup>`_
+in the root of the LLVM source tree.
 
 Note that code ownership is completely different than reviewers: anyone can
 review a piece of code, and we welcome code review from anyone who is
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
index 382e18f..30c3d5d 100644
--- a/docs/HowToReleaseLLVM.html
+++ b/docs/HowToReleaseLLVM.html
@@ -476,7 +476,7 @@ $ tar -cvf - llvm-test-<i>X.Y</i>rc1   | gzip &gt; llvm-test-<i>X.Y</i>rc1.src.t
 <p>Review the documentation and ensure that it is up to date. The "Release
    Notes" must be updated to reflect new features, bug fixes, new known issues,
    and changes in the list of supported platforms. The "Getting Started Guide"
-   should be updated to reflect the new release version number tag avaiable from
+   should be updated to reflect the new release version number tag available from
    Subversion and changes in basic system requirements. Merge both changes from
    mainline into the release branch.</p>
 
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 04c1bf8..810fce5 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -25,7 +25,6 @@
           <li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
           <li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
           <li><a href="#linkage_linker_private_weak">'<tt>linker_private_weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_linker_private_weak_def_auto">'<tt>linker_private_weak_def_auto</tt>' Linkage</a></li>
           <li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
           <li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
           <li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
@@ -34,6 +33,7 @@
           <li><a href="#linkage_appending">'<tt>appending</tt>' Linkage</a></li>
           <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
           <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
+          <li><a href="#linkage_linkonce_odr_auto_hide">'<tt>linkonce_odr_auto_hide</tt>' Linkage</a></li>
           <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
           <li><a href="#linkage_external">'<tt>external</tt>' Linkage</a></li>
           <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
@@ -258,6 +258,7 @@
           <li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
           <li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
           <li><a href="#int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a></li>
         </ol>
       </li>
       <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
@@ -575,15 +576,6 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
       linker. The symbols are removed by the linker from the final linked image
       (executable or dynamic library).</dd>
 
-  <dt><tt><b><a name="linkage_linker_private_weak_def_auto">linker_private_weak_def_auto</a></b></tt></dt>
-  <dd>Similar to "<tt>linker_private_weak</tt>", but it's known that the address
-      of the object is not taken. For instance, functions that had an inline
-      definition, but the compiler decided not to inline it. Note,
-      unlike <tt>linker_private</tt> and <tt>linker_private_weak</tt>,
-      <tt>linker_private_weak_def_auto</tt> may have only <tt>default</tt>
-      visibility.  The symbols are removed by the linker from the final linked
-      image (executable or dynamic library).</dd>
-
   <dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
   <dd>Similar to private, but the value shows as a local symbol
       (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
@@ -652,6 +644,14 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
       be merged with equivalent globals.  These linkage types are otherwise the
       same as their non-<tt>odr</tt> versions.</dd>
 
+  <dt><tt><b><a name="linkage_linkonce_odr_auto_hide">linkonce_odr_auto_hide</a></b></tt></dt>
+  <dd>Similar to "<tt>linkonce_odr</tt>", but nothing in the translation unit
+      takes the address of this definition. For instance, functions that had an
+      inline definition, but the compiler decided not to inline it.
+      <tt>linkonce_odr_auto_hide</tt> may have only <tt>default</tt> visibility.
+      The symbols are removed by the linker from the final linked image
+      (executable or dynamic library).</dd>
+
   <dt><tt><b><a name="linkage_external">external</a></b></tt></dt>
   <dd>If none of the above identifiers are used, the global is externally
       visible, meaning that it participates in linkage and can be used to
@@ -1207,6 +1207,13 @@ define void @f() optsize { ... }
       may make calls to the function faster, at the cost of extra program
       startup time if the function is not called during program startup.</dd>
 
+  <dt><tt><b>ia_nsdialect</b></tt></dt>
+  <dd>This attribute indicates the associated inline assembly call is using a
+      non-standard assembly dialect.  The standard dialect is ATT, which is
+      assumed when this attribute is not present.  When present, the dialect
+      is assumed to be Intel.  Currently, ATT and Intel are the only supported
+      dialects.</dd>
+
   <dt><tt><b>inlinehint</b></tt></dt>
   <dd>This attribute indicates that the source code contained a hint that inlining
       this function is desirable (such as the "inline" keyword in C/C++).  It
@@ -2780,7 +2787,7 @@ second_end:
       make it fit in <tt>TYPE</tt>.</dd>
 
   <dt><b><tt>inttoptr (CST to TYPE)</tt></b></dt>
-  <dd>Convert a integer constant to a pointer constant.  TYPE must be a pointer
+  <dd>Convert an integer constant to a pointer constant.  TYPE must be a pointer
       type.  CST must be of integer type. The CST value is zero extended,
       truncated, or unchanged to make it fit in a pointer size. This one is
       <i>really</i> dangerous!</dd>
@@ -7544,6 +7551,40 @@ LLVM</a>.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.floor</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.floor.f32(float  %Val)
+  declare double    @llvm.floor.f64(double %Val)
+  declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
+  declare fp128     @llvm.floor.f128(fp128 %Val)
+  declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.floor.*</tt>' intrinsics return the floor of
+   the operand.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>floor</tt> functions
+   would, and handles error conditions in the same way.</p>
+
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
diff --git a/docs/Passes.html b/docs/Passes.html
index 2ebc53a..85292e3 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -1970,7 +1970,7 @@ if (X &lt; 3) {</pre>
     <li>Verify that a function's argument list agrees with its declared
         type.</li>
     <li>It is illegal to specify a name for a void value.</li>
-    <li>It is illegal to have a internal global value with no initializer.</li>
+    <li>It is illegal to have an internal global value with no initializer.</li>
     <li>It is illegal to have a ret instruction that returns a value that does
         not agree with the function return value type.</li>
     <li>Function call argument types match the function prototype.</li>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index cfcce4d..036c387 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -507,8 +507,9 @@ small and pervasive enough in LLVM that it should always be passed by value.</p>
 
 <div>
 
-<p>The <tt>Twine</tt> class is an efficient way for APIs to accept concatenated
-strings.  For example, a common LLVM paradigm is to name one instruction based on
+<p>The <tt><a href="/doxygen/classllvm_1_1Twine.html">Twine</a></tt> class is an
+efficient way for APIs to accept concatenated strings.  For example, a common
+LLVM paradigm is to name one instruction based on
 the name of another instruction with a suffix, for example:</p>
 
 <div class="doc_code">
@@ -517,17 +518,17 @@ the name of another instruction with a suffix, for example:</p>
 </pre>
 </div>
 
-<p>The <tt>Twine</tt> class is effectively a
-lightweight <a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
+<p>The <tt>Twine</tt> class is effectively a lightweight
+<a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
 which points to temporary (stack allocated) objects.  Twines can be implicitly
 constructed as the result of the plus operator applied to strings (i.e., a C
-strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays the
-actual concatenation of strings until it is actually required, at which point
-it can be efficiently rendered directly into a character array.  This avoids
-unnecessary heap allocation involved in constructing the temporary results of
-string concatenation. See
-"<tt><a href="/doxygen/classllvm_1_1Twine_8h-source.html">llvm/ADT/Twine.h</a></tt>"
-for more information.</p>
+strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays
+the actual concatenation of strings until it is actually required, at which
+point it can be efficiently rendered directly into a character array.  This
+avoids unnecessary heap allocation involved in constructing the temporary
+results of string concatenation. See
+"<tt><a href="/doxygen/Twine_8h_source.html">llvm/ADT/Twine.h</a></tt>"
+and <a href="#dss_twine">here</a> for more information.</p>
 
 <p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
 and should almost never be stored or mentioned directly.  They are intended
diff --git a/docs/Projects.rst b/docs/Projects.rst
index c4be6da..6313288 100644
--- a/docs/Projects.rst
+++ b/docs/Projects.rst
@@ -102,7 +102,7 @@ Follow these simple steps to start your project:
    ``--prefix=<directory>``
        Tell your project where it should get installed.
 
-That's it!  Now all you have to do is type ``gmake`` (or ``make`` if your on a
+That's it!  Now all you have to do is type ``gmake`` (or ``make`` if you're on a
 GNU/Linux system) in the root of your object directory, and your project should
 build.
 
diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html
index 0a644d2..1b24250 100644
--- a/docs/TestSuiteMakefileGuide.html
+++ b/docs/TestSuiteMakefileGuide.html
@@ -238,12 +238,12 @@ LLVM.</p>
   simple one is simply running <tt>gmake</tt> with no arguments. This will
   compile and run all programs in the tree using a number of different methods
   and compare results. Any failures are reported in the output, but are likely
-  drowned in the other output. Passes are not reported explicitely.</p>
+  drowned in the other output. Passes are not reported explicitly.</p>
 
   <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
   the specified test and usually adds per-program summaries to the output
   (depending on which sometest you use). For example, the <tt>nightly</tt> test
-  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
+  explicitly outputs TEST-PASS or TEST-FAIL for every test after each program.
   Though these lines are still drowned in the output, it's easy to grep the
   output logs in the Output directories.</p>
 
diff --git a/docs/programming.rst b/docs/programming.rst
index add923f..27e4301 100644
--- a/docs/programming.rst
+++ b/docs/programming.rst
@@ -7,6 +7,7 @@ Programming Documentation
    :hidden:
 
    CodingStandards
+   CommandLine
 
 * `LLVM Language Reference Manual <LangRef.html>`_
 
@@ -18,7 +19,7 @@ Programming Documentation
   Introduction to the general layout of the LLVM sourcebase, important classes
   and APIs, and some tips & tricks.
 
-* `CommandLine library Reference Manual <CommandLine.html>`_
+* :ref:`commandline`
 
   Provides information on using the command line parsing library.
 
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index c4c3b6d..be33295 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -10,6 +10,7 @@ Subsystem Documentation
    BitCodeFormat
    BranchWeightMetadata
    Bugpoint
+   CodeGenerator
    ExceptionHandling
    LinkTimeOptimization
    SegmentedStacks
@@ -22,9 +23,9 @@ Subsystem Documentation
 * `Writing an LLVM Backend <WritingAnLLVMBackend.html>`_
     
    Information on how to write LLVM backends for machine targets.
-    
-* `The LLVM Target-Independent Code Generator <CodeGenerator.html>`_
-    
+
+* :ref:`code_generator`
+
    The design and implementation of the LLVM code generator.  Useful if you are
    working on retargetting LLVM to a new architecture, designing a new codegen
    pass, or enhancing existing components.
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index 9c606ae..a762980 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -636,7 +636,7 @@ def mandelhelp(xmin xmax xstep   ymin ymax ystep)
     : putchard(10)
   )
  
-# mandel - This is a convenient helper function for ploting the mandelbrot set
+# mandel - This is a convenient helper function for plotting the mandelbrot set
 # from the specified position with the specified Magnification.
 def mandel(realstart imagstart realmag imagmag) 
   mandelhelp(realstart, realstart+realmag*78, realmag,
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
index 0d1d280..2ee5089 100644
--- a/docs/tutorial/OCamlLangImpl6.html
+++ b/docs/tutorial/OCamlLangImpl6.html
@@ -611,7 +611,7 @@ def mandelhelp(xmin xmax xstep   ymin ymax ystep)
     : putchard(10)
   )
 
-# mandel - This is a convenient helper function for ploting the mandelbrot set
+# mandel - This is a convenient helper function for plotting the mandelbrot set
 # from the specified position with the specified Magnification.
 def mandel(realstart imagstart realmag imagmag)
   mandelhelp(realstart, realstart+realmag*78, realmag,
diff --git a/docs/yaml2obj.rst b/docs/yaml2obj.rst
new file mode 100644
index 0000000..cb59162
--- /dev/null
+++ b/docs/yaml2obj.rst
@@ -0,0 +1,222 @@
+.. _yaml2obj:
+
+yaml2obj
+========
+
+yaml2obj takes a YAML description of an object file and converts it to a binary
+file.
+
+    $ yaml2py input-file
+
+.. program:: yaml2py
+
+Outputs the binary to stdout.
+
+COFF Syntax
+-----------
+
+Here's a sample COFF file.
+
+.. code-block:: yaml
+
+  header:
+    Machine: IMAGE_FILE_MACHINE_I386 # (0x14C)
+
+  sections:
+    - Name: .text
+      Characteristics: [ IMAGE_SCN_CNT_CODE
+                       , IMAGE_SCN_ALIGN_16BYTES
+                       , IMAGE_SCN_MEM_EXECUTE
+                       , IMAGE_SCN_MEM_READ
+                       ] # 0x60500020
+      SectionData:
+        "\x83\xEC\x0C\xC7\x44\x24\x08\x00\x00\x00\x00\xC7\x04\x24\x00\x00\x00\x00\xE8\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x8B\x44\x24\x08\x83\xC4\x0C\xC3" # |....D$.......$...............D$.....|
+
+  symbols:
+    - Name: .text
+      Value: 0
+      SectionNumber: 1
+      SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+      ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+      StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+      NumberOfAuxSymbols: 1
+      AuxillaryData:
+        "\x24\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" # |$.................|
+
+    - Name: _main
+      Value: 0
+      SectionNumber: 1
+      SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+      ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+      StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+Here's a simplified Kwalify_ schema with an extension to allow alternate types.
+
+.. _Kwalify: http://www.kuwata-lab.com/kwalify/ruby/users-guide.html
+
+.. code-block:: yaml
+
+  type: map
+    mapping:
+      header:
+        type: map
+        mapping:
+          Machine: [ {type: str, enum:
+                                 [ IMAGE_FILE_MACHINE_UNKNOWN
+                                 , IMAGE_FILE_MACHINE_AM33
+                                 , IMAGE_FILE_MACHINE_AMD64
+                                 , IMAGE_FILE_MACHINE_ARM
+                                 , IMAGE_FILE_MACHINE_ARMV7
+                                 , IMAGE_FILE_MACHINE_EBC
+                                 , IMAGE_FILE_MACHINE_I386
+                                 , IMAGE_FILE_MACHINE_IA64
+                                 , IMAGE_FILE_MACHINE_M32R
+                                 , IMAGE_FILE_MACHINE_MIPS16
+                                 , IMAGE_FILE_MACHINE_MIPSFPU
+                                 , IMAGE_FILE_MACHINE_MIPSFPU16
+                                 , IMAGE_FILE_MACHINE_POWERPC
+                                 , IMAGE_FILE_MACHINE_POWERPCFP
+                                 , IMAGE_FILE_MACHINE_R4000
+                                 , IMAGE_FILE_MACHINE_SH3
+                                 , IMAGE_FILE_MACHINE_SH3DSP
+                                 , IMAGE_FILE_MACHINE_SH4
+                                 , IMAGE_FILE_MACHINE_SH5
+                                 , IMAGE_FILE_MACHINE_THUMB
+                                 , IMAGE_FILE_MACHINE_WCEMIPSV2
+                                 ]}
+                   , {type: int}
+                   ]
+          Characteristics:
+            - type: seq
+              sequence:
+                - type: str
+                  enum: [ IMAGE_FILE_RELOCS_STRIPPED
+                        , IMAGE_FILE_EXECUTABLE_IMAGE
+                        , IMAGE_FILE_LINE_NUMS_STRIPPED
+                        , IMAGE_FILE_LOCAL_SYMS_STRIPPED
+                        , IMAGE_FILE_AGGRESSIVE_WS_TRIM
+                        , IMAGE_FILE_LARGE_ADDRESS_AWARE
+                        , IMAGE_FILE_BYTES_REVERSED_LO
+                        , IMAGE_FILE_32BIT_MACHINE
+                        , IMAGE_FILE_DEBUG_STRIPPED
+                        , IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP
+                        , IMAGE_FILE_NET_RUN_FROM_SWAP
+                        , IMAGE_FILE_SYSTEM
+                        , IMAGE_FILE_DLL
+                        , IMAGE_FILE_UP_SYSTEM_ONLY
+                        , IMAGE_FILE_BYTES_REVERSED_HI
+                        ]
+            - type: int
+      sections:
+        type: seq
+        sequence:
+          - type: map
+            mapping:
+              Name: {type: str}
+              Characteristics:
+                - type: seq
+                  sequence:
+                    - type: str
+                      enum: [ IMAGE_SCN_TYPE_NO_PAD
+                            , IMAGE_SCN_CNT_CODE
+                            , IMAGE_SCN_CNT_INITIALIZED_DATA
+                            , IMAGE_SCN_CNT_UNINITIALIZED_DATA
+                            , IMAGE_SCN_LNK_OTHER
+                            , IMAGE_SCN_LNK_INFO
+                            , IMAGE_SCN_LNK_REMOVE
+                            , IMAGE_SCN_LNK_COMDAT
+                            , IMAGE_SCN_GPREL
+                            , IMAGE_SCN_MEM_PURGEABLE
+                            , IMAGE_SCN_MEM_16BIT
+                            , IMAGE_SCN_MEM_LOCKED
+                            , IMAGE_SCN_MEM_PRELOAD
+                            , IMAGE_SCN_ALIGN_1BYTES
+                            , IMAGE_SCN_ALIGN_2BYTES
+                            , IMAGE_SCN_ALIGN_4BYTES
+                            , IMAGE_SCN_ALIGN_8BYTES
+                            , IMAGE_SCN_ALIGN_16BYTES
+                            , IMAGE_SCN_ALIGN_32BYTES
+                            , IMAGE_SCN_ALIGN_64BYTES
+                            , IMAGE_SCN_ALIGN_128BYTES
+                            , IMAGE_SCN_ALIGN_256BYTES
+                            , IMAGE_SCN_ALIGN_512BYTES
+                            , IMAGE_SCN_ALIGN_1024BYTES
+                            , IMAGE_SCN_ALIGN_2048BYTES
+                            , IMAGE_SCN_ALIGN_4096BYTES
+                            , IMAGE_SCN_ALIGN_8192BYTES
+                            , IMAGE_SCN_LNK_NRELOC_OVFL
+                            , IMAGE_SCN_MEM_DISCARDABLE
+                            , IMAGE_SCN_MEM_NOT_CACHED
+                            , IMAGE_SCN_MEM_NOT_PAGED
+                            , IMAGE_SCN_MEM_SHARED
+                            , IMAGE_SCN_MEM_EXECUTE
+                            , IMAGE_SCN_MEM_READ
+                            , IMAGE_SCN_MEM_WRITE
+                            ]
+                - type: int
+              SectionData: {type: str}
+      symbols:
+        type: seq
+        sequence:
+          - type: map
+            mapping:
+              Name: {type: str}
+              Value: {type: int}
+              SectionNumber: {type: int}
+              SimpleType: [ {type: str, enum: [ IMAGE_SYM_TYPE_NULL
+                                              , IMAGE_SYM_TYPE_VOID
+                                              , IMAGE_SYM_TYPE_CHAR
+                                              , IMAGE_SYM_TYPE_SHORT
+                                              , IMAGE_SYM_TYPE_INT
+                                              , IMAGE_SYM_TYPE_LONG
+                                              , IMAGE_SYM_TYPE_FLOAT
+                                              , IMAGE_SYM_TYPE_DOUBLE
+                                              , IMAGE_SYM_TYPE_STRUCT
+                                              , IMAGE_SYM_TYPE_UNION
+                                              , IMAGE_SYM_TYPE_ENUM
+                                              , IMAGE_SYM_TYPE_MOE
+                                              , IMAGE_SYM_TYPE_BYTE
+                                              , IMAGE_SYM_TYPE_WORD
+                                              , IMAGE_SYM_TYPE_UINT
+                                              , IMAGE_SYM_TYPE_DWORD
+                                              ]}
+                          , {type: int}
+                          ]
+              ComplexType: [ {type: str, enum: [ IMAGE_SYM_DTYPE_NULL
+                                               , IMAGE_SYM_DTYPE_POINTER
+                                               , IMAGE_SYM_DTYPE_FUNCTION
+                                               , IMAGE_SYM_DTYPE_ARRAY
+                                               ]}
+                           , {type: int}
+                           ]
+              StorageClass: [ {type: str, enum:
+                                          [ IMAGE_SYM_CLASS_END_OF_FUNCTION
+                                          , IMAGE_SYM_CLASS_NULL
+                                          , IMAGE_SYM_CLASS_AUTOMATIC
+                                          , IMAGE_SYM_CLASS_EXTERNAL
+                                          , IMAGE_SYM_CLASS_STATIC
+                                          , IMAGE_SYM_CLASS_REGISTER
+                                          , IMAGE_SYM_CLASS_EXTERNAL_DEF
+                                          , IMAGE_SYM_CLASS_LABEL
+                                          , IMAGE_SYM_CLASS_UNDEFINED_LABEL
+                                          , IMAGE_SYM_CLASS_MEMBER_OF_STRUCT
+                                          , IMAGE_SYM_CLASS_ARGUMENT
+                                          , IMAGE_SYM_CLASS_STRUCT_TAG
+                                          , IMAGE_SYM_CLASS_MEMBER_OF_UNION
+                                          , IMAGE_SYM_CLASS_UNION_TAG
+                                          , IMAGE_SYM_CLASS_TYPE_DEFINITION
+                                          , IMAGE_SYM_CLASS_UNDEFINED_STATIC
+                                          , IMAGE_SYM_CLASS_ENUM_TAG
+                                          , IMAGE_SYM_CLASS_MEMBER_OF_ENUM
+                                          , IMAGE_SYM_CLASS_REGISTER_PARAM
+                                          , IMAGE_SYM_CLASS_BIT_FIELD
+                                          , IMAGE_SYM_CLASS_BLOCK
+                                          , IMAGE_SYM_CLASS_FUNCTION
+                                          , IMAGE_SYM_CLASS_END_OF_STRUCT
+                                          , IMAGE_SYM_CLASS_FILE
+                                          , IMAGE_SYM_CLASS_SECTION
+                                          , IMAGE_SYM_CLASS_WEAK_EXTERNAL
+                                          , IMAGE_SYM_CLASS_CLR_TOKEN
+                                          ]}
+                            , {type: int}
+                            ]
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 23df52c..eceae5c 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -53,7 +53,7 @@ extern "C" {
  * The declared parameter names are descriptive and specify which type is
  * required. Additionally, each type hierarchy is documented along with the
  * functions that operate upon it. For more detail, refer to LLVM's C++ code.
- * If in doubt, refer to Core.cpp, which performs paramter downcasts in the
+ * If in doubt, refer to Core.cpp, which performs parameter downcasts in the
  * form unwrap<RequiredType>(Param).
  *
  * Many exotic languages can interoperate with C code but have a harder time
@@ -106,7 +106,7 @@ typedef struct LLVMOpaqueType *LLVMTypeRef;
 typedef struct LLVMOpaqueValue *LLVMValueRef;
 
 /**
- * Represents a basic block of instruction in LLVM IR.
+ * Represents a basic block of instructions in LLVM IR.
  *
  * This models llvm::BasicBlock.
  */
@@ -282,6 +282,7 @@ typedef enum {
   LLVMLinkOnceAnyLinkage, /**< Keep one copy of function when linking (inline)*/
   LLVMLinkOnceODRLinkage, /**< Same, but only replaced by something
                             equivalent. */
+  LLVMLinkOnceODRAutoHideLinkage, /**< Like LinkOnceODR, but possibly hidden. */
   LLVMWeakAnyLinkage,     /**< Keep one copy of function when linking (weak) */
   LLVMWeakODRLinkage,     /**< Same, but only replaced by something
                             equivalent. */
@@ -295,9 +296,7 @@ typedef enum {
   LLVMGhostLinkage,       /**< Obsolete */
   LLVMCommonLinkage,      /**< Tentative definitions */
   LLVMLinkerPrivateLinkage, /**< Like Private, but linker removes. */
-  LLVMLinkerPrivateWeakLinkage, /**< Like LinkerPrivate, but is weak. */
-  LLVMLinkerPrivateWeakDefAutoLinkage /**< Like LinkerPrivateWeak, but possibly
-                                           hidden. */
+  LLVMLinkerPrivateWeakLinkage /**< Like LinkerPrivate, but is weak. */
 } LLVMLinkage;
 
 typedef enum {
@@ -986,7 +985,7 @@ LLVMTypeRef LLVMX86MMXType(void);
  *
  * LLVMValueRef essentially represents llvm::Value. There is a rich
  * hierarchy of classes within this type. Depending on the instance
- * obtain, not all APIs are available.
+ * obtained, not all APIs are available.
  *
  * Callers can determine the type of a LLVMValueRef by calling the
  * LLVMIsA* family of functions (e.g. LLVMIsAArgument()). These
@@ -1162,7 +1161,7 @@ LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
  *
  * Uses are obtained in an iterator fashion. First, call this function
  * to obtain a reference to the first use. Then, call LLVMGetNextUse()
- * on that instance and all subsequently obtained instances untl
+ * on that instance and all subsequently obtained instances until
  * LLVMGetNextUse() returns NULL.
  *
  * @see llvm::Value::use_begin()
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 2b466f9..5a625a4 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -274,6 +274,7 @@ namespace llvm {
     /* C fmod, or llvm frem. */
     opStatus mod(const APFloat &, roundingMode);
     opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
+    opStatus roundToIntegral(roundingMode);
 
     /* Sign operations.  */
     void changeSign();
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 9498ef0..f30a6e3 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -357,13 +357,7 @@ public:
   /// @brief Check if this APInt has an N-bits unsigned integer value.
   bool isIntN(unsigned N) const {
     assert(N && "N == 0 ???");
-    if (N >= getBitWidth())
-      return true;
-
-    if (isSingleWord())
-      return isUIntN(N, VAL);
-    return APInt(N, makeArrayRef(pVal, getNumWords())).zext(getBitWidth())
-      == (*this);
+    return getActiveBits() <= N;
   }
 
   /// @brief Check if this APInt has an N-bits signed integer value.
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index d502be6..f60d688 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -617,8 +617,9 @@ public:
     this->destroyAll();
 
     // Reduce the number of buckets.
-    unsigned NewNumBuckets
-      = std::max(64, 1 << (Log2_32_Ceil(OldNumEntries) + 1));
+    unsigned NewNumBuckets = 0;
+    if (OldNumEntries)
+      NewNumBuckets = std::max(64, 1 << (Log2_32_Ceil(OldNumEntries) + 1));
     if (NewNumBuckets == NumBuckets) {
       this->BaseT::initEmpty();
       return;
@@ -686,8 +687,7 @@ class SmallDenseMap
 
   /// A "union" of an inline bucket array and the struct representing
   /// a large bucket. This union will be discriminated by the 'Small' bit.
-  typename AlignedCharArray<BucketT[InlineBuckets], LargeRep>::union_type
-    storage;
+  AlignedCharArrayUnion<BucketT[InlineBuckets], LargeRep> storage;
 
 public:
   explicit SmallDenseMap(unsigned NumInitBuckets = 0) {
@@ -833,8 +833,7 @@ public:
         return; // Nothing to do.
 
       // First move the inline buckets into a temporary storage.
-      typename AlignedCharArray<BucketT[InlineBuckets]>::union_type
-        TmpStorage;
+      AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage;
       BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer);
       BucketT *TmpEnd = TmpBegin;
 
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 0c02a8f..a9724ee 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -34,7 +34,7 @@ namespace llvm {
 /// RefCountedBase - A generic base class for objects that wish to
 ///  have their lifetimes managed using reference counts. Classes
 ///  subclass RefCountedBase to obtain such functionality, and are
-///  typically handled with IntrusivePtr "smart pointers" (see below)
+///  typically handled with IntrusiveRefCntPtr "smart pointers" (see below)
 ///  which automatically handle the management of reference counts.
 ///  Objects that subclass RefCountedBase should not be allocated on
 ///  the stack, as invoking "delete" (which is called when the
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index d124091..9fbbbe4 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -463,6 +463,9 @@ public:
   }
 
   iterator erase(iterator I) {
+    assert(I >= this->begin() && "Iterator to erase is out of bounds.");
+    assert(I < this->end() && "Erasing at past-the-end iterator.");
+
     iterator N = I;
     // Shift all elts down one.
     this->move(I+1, this->end(), I);
@@ -472,6 +475,10 @@ public:
   }
 
   iterator erase(iterator S, iterator E) {
+    assert(S >= this->begin() && "Range to erase is out of bounds.");
+    assert(S <= E && "Trying to erase invalid range.");
+    assert(E <= this->end() && "Trying to erase past the end.");
+
     iterator N = S;
     // Shift all elts down.
     iterator I = this->move(E, this->end(), S);
@@ -488,6 +495,9 @@ public:
       return this->end()-1;
     }
 
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+
     if (this->EndX < this->CapacityX) {
     Retry:
       ::new ((void*) this->end()) T(::std::move(this->back()));
@@ -517,6 +527,9 @@ public:
       return this->end()-1;
     }
 
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+
     if (this->EndX < this->CapacityX) {
     Retry:
       ::new ((void*) this->end()) T(this->back());
@@ -548,6 +561,9 @@ public:
       return this->begin()+InsertElt;
     }
 
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+
     // Ensure there is enough space.
     reserve(static_cast<unsigned>(this->size() + NumToInsert));
 
@@ -596,6 +612,9 @@ public:
       return this->begin()+InsertElt;
     }
 
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+
     size_t NumToInsert = std::distance(From, To);
 
     // Ensure there is enough space.
@@ -899,7 +918,8 @@ public:
 template <typename T>
 class SmallVector<T,0> : public SmallVectorImpl<T> {
 public:
-  SmallVector() : SmallVectorImpl<T>(0) {}
+  SmallVector() : SmallVectorImpl<T>(0) {
+  }
 
   explicit SmallVector(unsigned Size, const T &Value = T())
     : SmallVectorImpl<T>(0) {
@@ -912,13 +932,26 @@ public:
   }
 
   SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(0) {
+    if (!RHS.empty())
+      SmallVectorImpl<T>::operator=(RHS);
+  }
+
+  const SmallVector &operator=(const SmallVector &RHS) {
     SmallVectorImpl<T>::operator=(RHS);
+    return *this;
   }
 
-  SmallVector &operator=(const SmallVectorImpl<T> &RHS) {
-    return SmallVectorImpl<T>::operator=(RHS);
+#if LLVM_USE_RVALUE_REFERENCES
+  SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(0) {
+    if (!RHS.empty())
+      SmallVectorImpl<T>::operator=(::std::move(RHS));
   }
 
+  const SmallVector &operator=(SmallVector &&RHS) {
+    SmallVectorImpl<T>::operator=(::std::move(RHS));
+    return *this;
+  }
+#endif
 };
 
 template<typename T, unsigned N>
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 1779bcb..cd84603 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -12,6 +12,7 @@
 
 #include "llvm/Support/type_traits.h"
 
+#include <algorithm>
 #include <cassert>
 #include <cstring>
 #include <limits>
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index 7480583..7fd6e27 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -48,8 +48,8 @@ class StringSwitch {
   const T *Result;
 
 public:
-  explicit StringSwitch(StringRef Str)
-  : Str(Str), Result(0) { }
+  explicit StringSwitch(StringRef S)
+  : Str(S), Result(0) { }
 
   template<unsigned N>
   StringSwitch& Case(const char (&S)[N], const T& Value) {
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index 8f3925c..d3d33b8 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -11,8 +11,9 @@
 #define LLVM_ADT_TINYPTRVECTOR_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -27,23 +28,78 @@ template <typename EltTy>
 class TinyPtrVector {
 public:
   typedef llvm::SmallVector<EltTy, 4> VecTy;
+  typedef typename VecTy::value_type value_type;
+
   llvm::PointerUnion<EltTy, VecTy*> Val;
-  
+
   TinyPtrVector() {}
+  ~TinyPtrVector() {
+    if (VecTy *V = Val.template dyn_cast<VecTy*>())
+      delete V;
+  }
+
   TinyPtrVector(const TinyPtrVector &RHS) : Val(RHS.Val) {
     if (VecTy *V = Val.template dyn_cast<VecTy*>())
       Val = new VecTy(*V);
   }
+  TinyPtrVector &operator=(const TinyPtrVector &RHS) {
+    if (this == &RHS)
+      return *this;
+    if (RHS.empty()) {
+      this->clear();
+      return *this;
+    }
+
+    // Try to squeeze into the single slot. If it won't fit, allocate a copied
+    // vector.
+    if (Val.template is<EltTy>()) {
+      if (RHS.size() == 1)
+        Val = RHS.front();
+      else
+        Val = new VecTy(*RHS.Val.template get<VecTy*>());
+      return *this;
+    }
+
+    // If we have a full vector allocated, try to re-use it.
+    if (RHS.Val.template is<EltTy>()) {
+      Val.template get<VecTy*>()->clear();
+      Val.template get<VecTy*>()->push_back(RHS.front());
+    } else {
+      *Val.template get<VecTy*>() = *RHS.Val.template get<VecTy*>();
+    }
+    return *this;
+  }
+
 #if LLVM_USE_RVALUE_REFERENCES
   TinyPtrVector(TinyPtrVector &&RHS) : Val(RHS.Val) {
     RHS.Val = (EltTy)0;
   }
-#endif
-  ~TinyPtrVector() {
-    if (VecTy *V = Val.template dyn_cast<VecTy*>())
+  TinyPtrVector &operator=(TinyPtrVector &&RHS) {
+    if (this == &RHS)
+      return *this;
+    if (RHS.empty()) {
+      this->clear();
+      return *this;
+    }
+
+    // If this vector has been allocated on the heap, re-use it if cheap. If it
+    // would require more copying, just delete it and we'll steal the other
+    // side.
+    if (VecTy *V = Val.template dyn_cast<VecTy*>()) {
+      if (RHS.Val.template is<EltTy>()) {
+        V->clear();
+        V->push_back(RHS.front());
+        return *this;
+      }
       delete V;
+    }
+
+    Val = RHS.Val;
+    RHS.Val = (EltTy)0;
+    return *this;
   }
-  
+#endif
+
   // implicit conversion operator to ArrayRef.
   operator ArrayRef<EltTy>() const {
     if (Val.isNull())
@@ -52,7 +108,7 @@ public:
       return *Val.getAddrOfPtr1();
     return *Val.template get<VecTy*>();
   }
-  
+
   bool empty() const {
     // This vector can be empty if it contains no element, or if it
     // contains a pointer to an empty vector.
@@ -61,7 +117,7 @@ public:
       return Vec->empty();
     return false;
   }
-  
+
   unsigned size() const {
     if (empty())
       return 0;
@@ -69,27 +125,21 @@ public:
       return 1;
     return Val.template get<VecTy*>()->size();
   }
-  
+
   typedef const EltTy *const_iterator;
   typedef EltTy *iterator;
 
   iterator begin() {
-    if (empty())
-      return 0;
-    
     if (Val.template is<EltTy>())
       return Val.getAddrOfPtr1();
-    
+
     return Val.template get<VecTy *>()->begin();
 
   }
   iterator end() {
-    if (empty())
-      return 0;
-    
     if (Val.template is<EltTy>())
-      return begin() + 1;
-    
+      return begin() + (Val.isNull() ? 0 : 1);
+
     return Val.template get<VecTy *>()->end();
   }
 
@@ -107,19 +157,19 @@ public:
       assert(i == 0 && "tinyvector index out of range");
       return V;
     }
-    
-    assert(i < Val.template get<VecTy*>()->size() && 
+
+    assert(i < Val.template get<VecTy*>()->size() &&
            "tinyvector index out of range");
     return (*Val.template get<VecTy*>())[i];
   }
-  
+
   EltTy front() const {
     assert(!empty() && "vector empty");
     if (EltTy V = Val.template dyn_cast<EltTy>())
       return V;
     return Val.template get<VecTy*>()->front();
   }
-  
+
   EltTy back() const {
     assert(!empty() && "vector empty");
     if (EltTy V = Val.template dyn_cast<EltTy>())
@@ -127,26 +177,25 @@ public:
     return Val.template get<VecTy*>()->back();
   }
 
-  
   void push_back(EltTy NewVal) {
     assert(NewVal != 0 && "Can't add a null value");
-    
+
     // If we have nothing, add something.
     if (Val.isNull()) {
       Val = NewVal;
       return;
     }
-    
+
     // If we have a single value, convert to a vector.
     if (EltTy V = Val.template dyn_cast<EltTy>()) {
       Val = new VecTy();
       Val.template get<VecTy*>()->push_back(V);
     }
-    
+
     // Add the new value, we know we have a vector.
     Val.template get<VecTy*>()->push_back(NewVal);
   }
-  
+
   void pop_back() {
     // If we have a single value, convert to empty.
     if (Val.template is<EltTy>())
@@ -155,7 +204,6 @@ public:
       Vec->pop_back();
   }
 
-  
   void clear() {
     // If we have a single value, convert to empty.
     if (Val.template is<EltTy>()) {
@@ -168,6 +216,9 @@ public:
   }
 
   iterator erase(iterator I) {
+    assert(I >= begin() && "Iterator to erase is out of bounds.");
+    assert(I < end() && "Erasing at past-the-end iterator.");
+
     // If we have a single value, convert to empty.
     if (Val.template is<EltTy>()) {
       if (I == begin())
@@ -177,15 +228,63 @@ public:
       // benefit to collapsing back to a pointer
       return Vec->erase(I);
     }
+    return end();
+  }
 
-    return 0;
+  iterator erase(iterator S, iterator E) {
+    assert(S >= begin() && "Range to erase is out of bounds.");
+    assert(S <= E && "Trying to erase invalid range.");
+    assert(E <= end() && "Trying to erase past the end.");
+
+    if (Val.template is<EltTy>()) {
+      if (S == begin() && S != E)
+        Val = (EltTy)0;
+    } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+      return Vec->erase(S, E);
+    }
+    return end();
+  }
+
+  iterator insert(iterator I, const EltTy &Elt) {
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+    if (I == end()) {
+      push_back(Elt);
+      return llvm::prior(end());
+    }
+    assert(!Val.isNull() && "Null value with non-end insert iterator.");
+    if (EltTy V = Val.template dyn_cast<EltTy>()) {
+      assert(I == begin());
+      Val = Elt;
+      push_back(V);
+      return begin();
+    }
+
+    return Val.template get<VecTy*>()->insert(I, Elt);
+  }
+
+  template<typename ItTy>
+  iterator insert(iterator I, ItTy From, ItTy To) {
+    assert(I >= this->begin() && "Insertion iterator is out of bounds.");
+    assert(I <= this->end() && "Inserting past the end of the vector.");
+    if (From == To)
+      return I;
+
+    // If we have a single value, convert to a vector.
+    ptrdiff_t Offset = I - begin();
+    if (Val.isNull()) {
+      if (llvm::next(From) == To) {
+        Val = *From;
+        return begin();
+      }
+
+      Val = new VecTy();
+    } else if (EltTy V = Val.template dyn_cast<EltTy>()) {
+      Val = new VecTy();
+      Val.template get<VecTy*>()->push_back(V);
+    }
+    return Val.template get<VecTy*>()->insert(begin() + Offset, From, To);
   }
-  
-private:
-  void operator=(const TinyPtrVector&); // NOT IMPLEMENTED YET.
-#if LLVM_USE_RVALUE_REFERENCES
-  void operator=(TinyPtrVector&&); // NOT IMPLEMENTED YET.
-#endif
 };
 } // end namespace llvm
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index a080200..7f7061a 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -98,7 +98,8 @@ public:
     Minix,
     RTEMS,
     NativeClient,
-    CNK         // BG/P Compute-Node Kernel
+    CNK,         // BG/P Compute-Node Kernel
+    Bitrig
   };
   enum EnvironmentType {
     UnknownEnvironment,
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
index a9a0dc6..a7f83a6 100644
--- a/include/llvm/ADT/VariadicFunction.h
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -206,7 +206,7 @@ struct VariadicFunction2 {
   ResultT operator()(Param0T P0, Param1T P1, \
                      LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
     const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
-    return Func(P0, P1, makeAraryRef(Args)); \
+    return Func(P0, P1, makeArrayRef(Args)); \
   }
   LLVM_DEFINE_OVERLOAD(1)
   LLVM_DEFINE_OVERLOAD(2)
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 2ced796..006daa0 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -122,6 +122,7 @@ private:
   bool calcLoopBranchHeuristics(BasicBlock *BB);
   bool calcZeroHeuristics(BasicBlock *BB);
   bool calcFloatingPointHeuristics(BasicBlock *BB);
+  bool calcInvokeHeuristics(BasicBlock *BB);
 };
 
 }
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 45be59b..a1cc196 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -705,6 +705,21 @@ DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A, const NodeT *B) {
 
 EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);
 
+class BasicBlockEdge {
+  const BasicBlock *Start;
+  const BasicBlock *End;
+public:
+  BasicBlockEdge(const BasicBlock *Start_, const BasicBlock *End_) :
+    Start(Start_), End(End_) { }
+  const BasicBlock *getStart() const {
+    return Start;
+  }
+  const BasicBlock *getEnd() const {
+    return End;
+  }
+  bool isSingleEdge() const;
+};
+
 //===-------------------------------------
 /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to
 /// compute a normal dominator tree.
@@ -778,6 +793,8 @@ public:
   bool dominates(const Instruction *Def, const Use &U) const;
   bool dominates(const Instruction *Def, const Instruction *User) const;
   bool dominates(const Instruction *Def, const BasicBlock *BB) const;
+  bool dominates(const BasicBlockEdge &BBE, const Use &U) const;
+  bool dominates(const BasicBlockEdge &BBE, const BasicBlock *BB) const;
 
   bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const {
     return DT->properlyDominates(A, B);
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index e674e74..e16f389 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -146,6 +146,7 @@ class ObjectSizeOffsetVisitor
   bool RoundToAlign;
   unsigned IntTyBits;
   APInt Zero;
+  SmallPtrSet<Instruction *, 8> SeenInsts;
 
   APInt align(APInt Size, uint64_t Align);
 
@@ -203,7 +204,6 @@ class ObjectSizeOffsetEvaluator
   const TargetData *TD;
   LLVMContext &Context;
   BuilderTy Builder;
-  ObjectSizeOffsetVisitor Visitor;
   IntegerType *IntTy;
   Value *Zero;
   CacheMapTy CacheMap;
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 81ad3f0..7e049d6 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -124,11 +124,11 @@ namespace llvm {
     }
 
     /// isClobber - Return true if this MemDepResult represents a query that is
-    /// a instruction clobber dependency.
+    /// an instruction clobber dependency.
     bool isClobber() const { return Value.getInt() == Clobber; }
 
     /// isDef - Return true if this MemDepResult represents a query that is
-    /// a instruction definition dependency.
+    /// an instruction definition dependency.
     bool isDef() const { return Value.getInt() == Def; }
     
     /// isNonLocal - Return true if this MemDepResult represents a query that
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index eae94e7..188d11c 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -500,50 +500,58 @@ public:
   /// Region. The iterator also iterates over BasicBlocks that are elements of
   /// a subregion of this Region. It is therefore called a flat iterator.
   //@{
-  template <typename RegionNodeItT>
+  template <bool IsConst>
   class block_iterator_wrapper
-    : public std::iterator<std::forward_iterator_tag, BasicBlock, ptrdiff_t> {
-    typedef std::iterator<std::forward_iterator_tag, BasicBlock, ptrdiff_t>
+    : public df_iterator<typename conditional<IsConst,
+                                              const BasicBlock,
+                                              BasicBlock>::type*> {
+    typedef df_iterator<typename conditional<IsConst,
+                                             const BasicBlock,
+                                             BasicBlock>::type*>
       super;
-
-    RegionNodeItT Iter;
-
   public:
-    typedef block_iterator_wrapper<RegionNodeItT> Self;
+    typedef block_iterator_wrapper<IsConst> Self;
     typedef typename super::pointer pointer;
 
-    block_iterator_wrapper(RegionNodeItT Iter) : Iter(Iter) {}
-
-    bool operator==(const Self &RHS) const { return Iter == RHS.Iter; }
-    bool operator!=(const Self &RHS) const { return Iter != RHS.Iter; }
-    pointer operator*() const {
-      return (*Iter)->template getNodeAs<BasicBlock>();
+    // Construct the begin iterator.
+    block_iterator_wrapper(pointer Entry, pointer Exit) : super(df_begin(Entry))
+    {
+      // Mark the exit of the region as visited, so that the children of the
+      // exit and the exit itself, i.e. the block outside the region will never
+      // be visited.
+      super::Visited.insert(Exit);
     }
 
-    Self& operator++() {
-      ++Iter;
-      return *this;
-    }
-    Self operator++(int) {
-      Self tmp = *this;
-      ++*this;
-      return tmp;
-    }
+    // Construct the end iterator.
+    block_iterator_wrapper() : super(df_end<pointer>((BasicBlock *)0)) {}
+
+    /*implicit*/ block_iterator_wrapper(super I) : super(I) {}
 
-    const Self &operator=(const Self &I) {
-      Iter = I.Iter;
-      return *this;
+    // FIXME: Even a const_iterator returns a non-const BasicBlock pointer.
+    //        This was introduced for backwards compatibility, but should
+    //        be removed as soon as all users are fixed.
+    BasicBlock *operator*() const {
+      return const_cast<BasicBlock*>(super::operator*());
     }
   };
-  typedef block_iterator_wrapper<block_node_iterator> block_iterator;
-  typedef block_iterator_wrapper<const_block_node_iterator>
-    const_block_iterator;
 
-  block_iterator block_begin();
-  block_iterator block_end();
+  typedef block_iterator_wrapper<false> block_iterator;
+  typedef block_iterator_wrapper<true>  const_block_iterator;
 
-  const_block_iterator block_begin() const;
-  const_block_iterator block_end() const;
+  block_iterator block_begin() {
+   return block_iterator(getEntry(), getExit());
+  }
+
+  block_iterator block_end() {
+   return block_iterator();
+  }
+
+  const_block_iterator block_begin() const {
+    return const_block_iterator(getEntry(), getExit());
+  }
+  const_block_iterator block_end() const {
+    return const_block_iterator();
+  }
   //@}
 
   /// @name Element Iterators
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 59fdd99..223aa00 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -134,6 +134,9 @@ DECLARE_LLVM_ATTRIBUTE(NonLazyBind,1U<<31) ///< Function is called early and/or
                                             /// often, so lazy binding isn't
                                             /// worthwhile.
 DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is on.
+DECLARE_LLVM_ATTRIBUTE(IANSDialect,1ULL<<33) ///< Inline asm non-standard dialect.
+                                           /// When not set, ATT dialect assumed.
+                                           /// When set implies the Intel dialect.
 
 #undef DECLARE_LLVM_ATTRIBUTE
 
@@ -159,14 +162,16 @@ const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
   ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
   StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
   Naked_i | InlineHint_i | StackAlignment_i |
-  UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
+  UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i |
+  IANSDialect_i};
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const AttrConst VarArgsIncompatible = {StructRet_i};
 
 /// @brief Attributes that are mutually incompatible.
-const AttrConst MutuallyIncompatible[4] = {
-  {ByVal_i | InReg_i | Nest_i | StructRet_i},
+const AttrConst MutuallyIncompatible[5] = {
+  {ByVal_i | Nest_i | StructRet_i},
+  {ByVal_i | Nest_i | InReg_i },
   {ZExt_i  | SExt_i},
   {ReadNone_i | ReadOnly_i},
   {NoInline_i | AlwaysInline_i}
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 86c44c7..3c75e58 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -47,14 +47,13 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     /// characteristics of the member. The various "is" methods below provide
     /// access to the flags. The flags are not user settable.
     enum Flags {
-      CompressedFlag = 1,          ///< Member is a normal compressed file
-      SVR4SymbolTableFlag = 2,     ///< Member is a SVR4 symbol table
-      BSD4SymbolTableFlag = 4,     ///< Member is a BSD4 symbol table
-      LLVMSymbolTableFlag = 8,     ///< Member is an LLVM symbol table
-      BitcodeFlag = 16,            ///< Member is bitcode
-      HasPathFlag = 64,            ///< Member has a full or partial path
-      HasLongFilenameFlag = 128,   ///< Member uses the long filename syntax
-      StringTableFlag = 256        ///< Member is an ar(1) format string table
+      SVR4SymbolTableFlag = 1,     ///< Member is a SVR4 symbol table
+      BSD4SymbolTableFlag = 2,     ///< Member is a BSD4 symbol table
+      LLVMSymbolTableFlag = 4,     ///< Member is an LLVM symbol table
+      BitcodeFlag = 8,             ///< Member is bitcode
+      HasPathFlag = 16,            ///< Member has a full or partial path
+      HasLongFilenameFlag = 32,    ///< Member uses the long filename syntax
+      StringTableFlag = 64         ///< Member is an ar(1) format string table
     };
 
   /// @}
@@ -109,11 +108,6 @@ class ArchiveMember : public ilist_node<ArchiveMember> {
     /// @brief Get the data content of the archive member
     const char* getData() const { return data; }
 
-    /// This method determines if the member is a regular compressed file.
-    /// @returns true iff the archive member is a compressed regular file.
-    /// @brief Determine if the member is a compressed regular file.
-    bool isCompressed() const { return flags&CompressedFlag; }
-
     /// @returns true iff the member is a SVR4 (non-LLVM) symbol table
     /// @brief Determine if this member is a SVR4 symbol table.
     bool isSVR4SymbolTable() const { return flags&SVR4SymbolTableFlag; }
@@ -427,7 +421,6 @@ class Archive {
     bool writeToDisk(
       bool CreateSymbolTable=false,   ///< Create Symbol table
       bool TruncateNames=false,       ///< Truncate the filename to 15 chars
-      bool Compress=false,            ///< Compress files
       std::string* ErrMessage=0       ///< If non-null, where error msg is set
     );
 
@@ -494,7 +487,6 @@ class Archive {
       std::ofstream& ARFile,       ///< The file to write member onto
       bool CreateSymbolTable,      ///< Should symbol table be created?
       bool TruncateNames,          ///< Should names be truncated to 11 chars?
-      bool ShouldCompress,         ///< Should the member be compressed?
       std::string* ErrMessage      ///< If non-null, place were error msg is set
     );
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 475da13..dea118f 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -155,6 +155,7 @@ public:
   }
 
   void EmitVBR(uint32_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
     uint32_t Threshold = 1U << (NumBits-1);
 
     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
@@ -167,10 +168,11 @@ public:
   }
 
   void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
     if ((uint32_t)Val == Val)
       return EmitVBR((uint32_t)Val, NumBits);
 
-    uint64_t Threshold = 1U << (NumBits-1);
+    uint32_t Threshold = 1U << (NumBits-1);
 
     // Emit the bits with VBR encoding, NumBits-1 bits at a time.
     while (Val >= Threshold) {
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 9fe743e..7cb9695 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -34,6 +34,7 @@ class MachineFrameInfo;
 class MachineRegisterInfo;
 class TargetData;
 class TargetInstrInfo;
+class TargetLibraryInfo;
 class TargetLowering;
 class TargetMachine;
 class TargetRegisterClass;
@@ -57,6 +58,7 @@ protected:
   const TargetInstrInfo &TII;
   const TargetLowering &TLI;
   const TargetRegisterInfo &TRI;
+  const TargetLibraryInfo *LibInfo;
 
   /// The position of the last instruction for materializing constants
   /// for use in the current block. It resets to EmitStartPt when it
@@ -144,7 +146,8 @@ public:
   virtual ~FastISel();
 
 protected:
-  explicit FastISel(FunctionLoweringInfo &funcInfo);
+  explicit FastISel(FunctionLoweringInfo &funcInfo,
+                    const TargetLibraryInfo *libInfo);
 
   /// TargetSelectInstruction - This method is called by target-independent
   /// code when the normal FastISel process fails to select an instruction.
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 83a848c..f387bd5 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -126,6 +126,11 @@ namespace ISD {
     TargetExternalSymbol,
     TargetBlockAddress,
 
+    /// TargetIndex - Like a constant pool entry, but with completely
+    /// target-dependent semantics. Holds target flags, a 32-bit index, and a
+    /// 64-bit index. Targets can use this however they like.
+    TargetIndex,
+
     /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
     /// This node represents a target intrinsic function with no side effects.
     /// The first operand is the ID number of the intrinsic from the
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 3fe7c8d..a3ce47c 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -40,15 +40,6 @@ namespace llvm {
   /// definition and use points.
   ///
   class VNInfo {
-  private:
-    enum {
-      HAS_PHI_KILL    = 1,
-      IS_PHI_DEF      = 1 << 1,
-      IS_UNUSED       = 1 << 2
-    };
-
-    unsigned char flags;
-
   public:
     typedef BumpPtrAllocator Allocator;
 
@@ -60,60 +51,30 @@ namespace llvm {
 
     /// VNInfo constructor.
     VNInfo(unsigned i, SlotIndex d)
-      : flags(0), id(i), def(d)
+      : id(i), def(d)
     { }
 
     /// VNInfo construtor, copies values from orig, except for the value number.
     VNInfo(unsigned i, const VNInfo &orig)
-      : flags(orig.flags), id(i), def(orig.def)
+      : id(i), def(orig.def)
     { }
 
     /// Copy from the parameter into this VNInfo.
     void copyFrom(VNInfo &src) {
-      flags = src.flags;
       def = src.def;
     }
 
-    /// Used for copying value number info.
-    unsigned getFlags() const { return flags; }
-    void setFlags(unsigned flags) { this->flags = flags; }
-
-    /// Merge flags from another VNInfo
-    void mergeFlags(const VNInfo *VNI) {
-      flags = (flags | VNI->flags) & ~IS_UNUSED;
-    }
-
-    /// Returns true if one or more kills are PHI nodes.
-    /// Obsolete, do not use!
-    bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
-    /// Set the PHI kill flag on this value.
-    void setHasPHIKill(bool hasKill) {
-      if (hasKill)
-        flags |= HAS_PHI_KILL;
-      else
-        flags &= ~HAS_PHI_KILL;
-    }
-
     /// Returns true if this value is defined by a PHI instruction (or was,
     /// PHI instrucions may have been eliminated).
-    bool isPHIDef() const { return flags & IS_PHI_DEF; }
-    /// Set the "phi def" flag on this value.
-    void setIsPHIDef(bool phiDef) {
-      if (phiDef)
-        flags |= IS_PHI_DEF;
-      else
-        flags &= ~IS_PHI_DEF;
-    }
+    /// PHI-defs begin at a block boundary, all other defs begin at register or
+    /// EC slots.
+    bool isPHIDef() const { return def.isBlock(); }
 
     /// Returns true if this value is unused.
-    bool isUnused() const { return flags & IS_UNUSED; }
-    /// Set the "is unused" flag on this value.
-    void setIsUnused(bool unused) {
-      if (unused)
-        flags |= IS_UNUSED;
-      else
-        flags &= ~IS_UNUSED;
-    }
+    bool isUnused() const { return !def.isValid(); }
+
+    /// Mark this value as unused.
+    void markUnused() { def = SlotIndex(); }
   };
 
   /// LiveRange structure - This represents a simple register range in the
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index a344b1f..da521db 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -241,18 +241,15 @@ namespace llvm {
     /// print - Implement the dump method.
     virtual void print(raw_ostream &O, const Module* = 0) const;
 
-    /// isReMaterializable - Returns true if every definition of MI of every
-    /// val# of the specified interval is re-materializable. Also returns true
-    /// by reference if all of the defs are load instructions.
-    bool isReMaterializable(const LiveInterval &li,
-                            const SmallVectorImpl<LiveInterval*> *SpillIs,
-                            bool &isLoad);
-
     /// intervalIsInOneMBB - If LI is confined to a single basic block, return
     /// a pointer to that block.  If LI is live in to or out of any block,
     /// return NULL.
     MachineBasicBlock *intervalIsInOneMBB(const LiveInterval &LI) const;
 
+    /// Returns true if VNI is killed by any PHI-def values in LI.
+    /// This may conservatively return true to avoid expensive computations.
+    bool hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const;
+
     /// addKillFlags - Add kill flags to any instruction that kills a virtual
     /// register.
     void addKillFlags();
@@ -347,6 +344,12 @@ namespace llvm {
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
 
+    /// Compute live intervals for all virtual registers.
+    void computeVirtRegs();
+
+    /// Compute RegMaskSlots and RegMaskBits.
+    void computeRegMasks();
+
     /// handleRegisterDef - update intervals for a register def
     /// (calls handleVirtualRegisterDef)
     void handleRegisterDef(MachineBasicBlock *MBB,
@@ -375,6 +378,7 @@ namespace llvm {
 
     void computeLiveInRegUnits();
     void computeRegUnitInterval(LiveInterval*);
+    void computeVirtRegInterval(LiveInterval*);
 
     class HMEditor;
   };
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 4371aa5..77ea4d0 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -351,6 +351,8 @@ public:
   /// parameter is stored in Weights list and it may be used by
   /// MachineBranchProbabilityInfo analysis to calculate branch probability.
   ///
+  /// Note that duplicate Machine CFG edges are not allowed.
+  ///
   void addSuccessor(MachineBasicBlock *succ, uint32_t weight = 0);
 
   /// removeSuccessor - Remove successor from the successors list of this
@@ -379,6 +381,10 @@ public:
   /// which refer to fromMBB to refer to this.
   void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB);
 
+  /// isPredecessor - Return true if the specified MBB is a predecessor of this
+  /// block.
+  bool isPredecessor(const MachineBasicBlock *MBB) const;
+
   /// isSuccessor - Return true if the specified MBB is a successor of this
   /// block.
   bool isSuccessor(const MachineBasicBlock *MBB) const;
@@ -568,7 +574,7 @@ private:
   /// getSuccWeight - Return weight of the edge from this block to MBB. This
   /// method should NOT be called directly, but by using getEdgeWeight method
   /// from MachineBranchProbabilityInfo class.
-  uint32_t getSuccWeight(const MachineBasicBlock *succ) const;
+  uint32_t getSuccWeight(const_succ_iterator Succ) const;
 
 
   // Methods used to maintain doubly linked list of blocks...
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index af4db7d..12189ce 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -16,14 +16,12 @@
 #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 
 #include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/BranchProbability.h"
 #include <climits>
 
 namespace llvm {
 
-class raw_ostream;
-class MachineBasicBlock;
-
 class MachineBranchProbabilityInfo : public ImmutablePass {
   virtual void anchor();
 
@@ -52,6 +50,11 @@ public:
   uint32_t getEdgeWeight(const MachineBasicBlock *Src,
                          const MachineBasicBlock *Dst) const;
 
+  // Same thing, but using a const_succ_iterator from Src. This is faster when
+  // the iterator is already available.
+  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+                         MachineBasicBlock::const_succ_iterator Dst) const;
+
   // Get sum of the block successors' weights, potentially scaling them to fit
   // within 32-bits. If scaling is required, sets Scale based on the necessary
   // adjustment. Any edge weights used with the sum should be divided by Scale.
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 65093d7..27756ab 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -420,6 +420,12 @@ public:
     return hasProperty(MCID::Bitcast, Type);
   }
 
+  /// isSelect - Return true if this instruction is a select instruction.
+  ///
+  bool isSelect(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Select, Type);
+  }
+
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
@@ -635,6 +641,30 @@ public:
       getOperand(0).getSubReg() == getOperand(1).getSubReg();
   }
 
+  /// isTransient - Return true if this is a transient instruction that is
+  /// either very likely to be eliminated during register allocation (such as
+  /// copy-like instructions), or if this instruction doesn't have an
+  /// execution-time cost.
+  bool isTransient() const {
+    switch(getOpcode()) {
+    default: return false;
+    // Copy-like instructions are usually eliminated during register allocation.
+    case TargetOpcode::PHI:
+    case TargetOpcode::COPY:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    // Pseudo-instructions that don't produce any real output.
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::GC_LABEL:
+    case TargetOpcode::DBG_VALUE:
+      return true;
+    }
+  }
+
   /// getBundleSize - Return the number of instructions inside the MI bundle.
   unsigned getBundleSize() const;
 
@@ -912,12 +942,12 @@ private:
   /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
   /// this instruction from their respective use lists.  This requires that the
   /// operands already be on their use lists.
-  void RemoveRegOperandsFromUseLists();
+  void RemoveRegOperandsFromUseLists(MachineRegisterInfo&);
 
   /// AddRegOperandsToUseLists - Add all of the register operands in
   /// this instruction from their respective use lists.  This requires that the
   /// operands not be on their use lists yet.
-  void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo);
+  void AddRegOperandsToUseLists(MachineRegisterInfo&);
 
   /// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a
   /// bundle.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 9192474..654361f 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -108,6 +108,12 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &addTargetIndex(unsigned Idx, int64_t Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateTargetIndex(Idx, Offset, TargetFlags));
+    return *this;
+  }
+
   const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
                                           unsigned char TargetFlags = 0) const {
     MI->addOperand(MachineOperand::CreateJTI(Idx, TargetFlags));
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index c3b4f7c..37d42b3 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -45,6 +45,7 @@ public:
     MO_MachineBasicBlock,      ///< MachineBasicBlock reference
     MO_FrameIndex,             ///< Abstract Stack Frame Index
     MO_ConstantPoolIndex,      ///< Address of indexed Constant in Constant Pool
+    MO_TargetIndex,            ///< Target-dependent index+offset operand.
     MO_JumpTableIndex,         ///< Address of indexed Jump Table for switch
     MO_ExternalSymbol,         ///< Name of external global symbol
     MO_GlobalAddress,          ///< Address of a global value
@@ -149,7 +150,7 @@ private:
 
     struct {                  // For MO_Register.
       // Register number is in SmallContents.RegNo.
-      MachineOperand **Prev;  // Access list for register.
+      MachineOperand *Prev;   // Access list for register. See MRI.
       MachineOperand *Next;
     } Reg;
 
@@ -215,6 +216,8 @@ public:
   bool isFI() const { return OpKind == MO_FrameIndex; }
   /// isCPI - Tests if this is a MO_ConstantPoolIndex operand.
   bool isCPI() const { return OpKind == MO_ConstantPoolIndex; }
+  /// isTargetIndex - Tests if this is a MO_TargetIndex operand.
+  bool isTargetIndex() const { return OpKind == MO_TargetIndex; }
   /// isJTI - Tests if this is a MO_JumpTableIndex operand.
   bool isJTI() const { return OpKind == MO_JumpTableIndex; }
   /// isGlobal - Tests if this is a MO_GlobalAddress operand.
@@ -302,13 +305,6 @@ public:
     return !isUndef() && !isInternalRead() && (isUse() || getSubReg());
   }
 
-  /// getNextOperandForReg - Return the next MachineOperand in the function that
-  /// uses or defines this register.
-  MachineOperand *getNextOperandForReg() const {
-    assert(isReg() && "This is not a register operand!");
-    return Contents.Reg.Next;
-  }
-
   //===--------------------------------------------------------------------===//
   // Mutators for Register Operands
   //===--------------------------------------------------------------------===//
@@ -335,17 +331,9 @@ public:
   ///
   void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
 
-  void setIsUse(bool Val = true) {
-    assert(isReg() && "Wrong MachineOperand accessor");
-    assert((Val || !isDebug()) && "Marking a debug operation as def");
-    IsDef = !Val;
-  }
+  void setIsUse(bool Val = true) { setIsDef(!Val); }
 
-  void setIsDef(bool Val = true) {
-    assert(isReg() && "Wrong MachineOperand accessor");
-    assert((!Val || !isDebug()) && "Marking a debug operation as def");
-    IsDef = Val;
-  }
+  void setIsDef(bool Val = true);
 
   void setImplicit(bool Val = true) {
     assert(isReg() && "Wrong MachineOperand accessor");
@@ -408,7 +396,7 @@ public:
   }
 
   int getIndex() const {
-    assert((isFI() || isCPI() || isJTI()) &&
+    assert((isFI() || isCPI() || isTargetIndex() || isJTI()) &&
            "Wrong MachineOperand accessor");
     return Contents.OffsetedInfo.Val.Index;
   }
@@ -431,8 +419,8 @@ public:
   /// getOffset - Return the offset from the symbol in this operand. This always
   /// returns 0 for ExternalSymbol operands.
   int64_t getOffset() const {
-    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
-           "Wrong MachineOperand accessor");
+    assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() ||
+            isBlockAddress()) && "Wrong MachineOperand accessor");
     return (int64_t(Contents.OffsetedInfo.OffsetHi) << 32) |
            SmallContents.OffsetLo;
   }
@@ -479,14 +467,14 @@ public:
   }
 
   void setOffset(int64_t Offset) {
-    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
-        "Wrong MachineOperand accessor");
+    assert((isGlobal() || isSymbol() || isCPI() || isTargetIndex() ||
+            isBlockAddress()) && "Wrong MachineOperand accessor");
     SmallContents.OffsetLo = unsigned(Offset);
     Contents.OffsetedInfo.OffsetHi = int(Offset >> 32);
   }
 
   void setIndex(int Idx) {
-    assert((isFI() || isCPI() || isJTI()) &&
+    assert((isFI() || isCPI() || isTargetIndex() || isJTI()) &&
            "Wrong MachineOperand accessor");
     Contents.OffsetedInfo.Val.Index = Idx;
   }
@@ -587,6 +575,14 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
+  static MachineOperand CreateTargetIndex(unsigned Idx, int64_t Offset,
+                                          unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_TargetIndex);
+    Op.setIndex(Idx);
+    Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
   static MachineOperand CreateJTI(unsigned Idx,
                                   unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_JumpTableIndex);
@@ -662,15 +658,6 @@ private:
     assert(isReg() && "Can only add reg operand to use lists");
     return Contents.Reg.Prev != 0;
   }
-
-  /// AddRegOperandToRegInfo - Add this register operand to the specified
-  /// MachineRegisterInfo.  If it is null, then the next/prev fields should be
-  /// explicitly nulled out.
-  void AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo);
-
-  /// RemoveRegOperandFromRegInfo - Remove this register operand from the
-  /// MachineRegisterInfo it is linked with.
-  void RemoveRegOperandFromRegInfo();
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 2bcd1c7..42a8aa4 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -57,6 +57,26 @@ class MachineRegisterInfo {
   /// physical registers.
   MachineOperand **PhysRegUseDefLists;
 
+  /// getRegUseDefListHead - Return the head pointer for the register use/def
+  /// list for the specified virtual or physical register.
+  MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
+  }
+
+  MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
+  }
+
+  /// Get the next element in the use-def chain.
+  static MachineOperand *getNextOperandForReg(const MachineOperand *MO) {
+    assert(MO && MO->isReg() && "This is not a register operand!");
+    return MO->Contents.Reg.Next;
+  }
+
   /// UsedPhysRegs - This is a bit vector that is computed and set by the
   /// register allocator, and must be kept up to date by passes that run after
   /// register allocation (though most don't modify this).  This is used
@@ -129,12 +149,21 @@ public:
   // Register Info
   //===--------------------------------------------------------------------===//
 
+  // Strictly for use by MachineInstr.cpp.
+  void addRegOperandToUseList(MachineOperand *MO);
+
+  // Strictly for use by MachineInstr.cpp.
+  void removeRegOperandFromUseList(MachineOperand *MO);
+
   /// reg_begin/reg_end - Provide iteration support to walk over all definitions
   /// and uses of a register within the MachineFunction that corresponds to this
   /// MachineRegisterInfo object.
   template<bool Uses, bool Defs, bool SkipDebug>
   class defusechain_iterator;
 
+  // Make it a friend so it can access getNextOperandForReg().
+  template<bool, bool, bool> friend class defusechain_iterator;
+
   /// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified
   /// register.
   typedef defusechain_iterator<true,true,false> reg_iterator;
@@ -172,6 +201,15 @@ public:
   /// specified register (it may be live-in).
   bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); }
 
+  /// hasOneDef - Return true if there is exactly one instruction defining the
+  /// specified register.
+  bool hasOneDef(unsigned RegNo) const {
+    def_iterator DI = def_begin(RegNo);
+    if (DI == def_end())
+      return false;
+    return ++DI == def_end();
+  }
+
   /// use_iterator/use_begin/use_end - Walk all uses of the specified register.
   typedef defusechain_iterator<true,false,false> use_iterator;
   use_iterator use_begin(unsigned RegNo) const {
@@ -185,7 +223,12 @@ public:
 
   /// hasOneUse - Return true if there is exactly one instruction using the
   /// specified register.
-  bool hasOneUse(unsigned RegNo) const;
+  bool hasOneUse(unsigned RegNo) const {
+    use_iterator UI = use_begin(RegNo);
+    if (UI == use_end())
+      return false;
+    return ++UI == use_end();
+  }
 
   /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the
   /// specified register, skipping those marked as Debug.
@@ -218,20 +261,6 @@ public:
   /// constraints.
   void replaceRegWith(unsigned FromReg, unsigned ToReg);
 
-  /// getRegUseDefListHead - Return the head pointer for the register use/def
-  /// list for the specified virtual or physical register.
-  MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
-    if (TargetRegisterInfo::isVirtualRegister(RegNo))
-      return VRegInfo[RegNo].second;
-    return PhysRegUseDefLists[RegNo];
-  }
-
-  MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
-    if (TargetRegisterInfo::isVirtualRegister(RegNo))
-      return VRegInfo[RegNo].second;
-    return PhysRegUseDefLists[RegNo];
-  }
-
   /// getVRegDef - Return the machine instr that defines the specified virtual
   /// register or null if none is found.  This assumes that the code is in SSA
   /// form, so there should only be one definition.
@@ -439,10 +468,6 @@ public:
                         const TargetRegisterInfo &TRI,
                         const TargetInstrInfo &TII);
 
-private:
-  void HandleVRegListReallocation();
-
-public:
   /// defusechain_iterator - This class provides iterator support for machine
   /// operands in the function that use or define a specific register.  If
   /// ReturnUses is true it returns uses of registers, if ReturnDefs is true it
@@ -486,13 +511,22 @@ public:
     // Iterator traversal: forward iteration only
     defusechain_iterator &operator++() {          // Preincrement
       assert(Op && "Cannot increment end iterator!");
-      Op = Op->getNextOperandForReg();
-
-      // If this is an operand we don't care about, skip it.
-      while (Op && ((!ReturnUses && Op->isUse()) ||
-                    (!ReturnDefs && Op->isDef()) ||
-                    (SkipDebug && Op->isDebug())))
-        Op = Op->getNextOperandForReg();
+      Op = getNextOperandForReg(Op);
+
+      // All defs come before the uses, so stop def_iterator early.
+      if (!ReturnUses) {
+        if (Op) {
+          if (Op->isUse())
+            Op = 0;
+          else
+            assert(!Op->isDebug() && "Can't have debug defs");
+        }
+      } else {
+        // If this is an operand we don't care about, skip it.
+        while (Op && ((!ReturnDefs && Op->isDef()) ||
+                      (SkipDebug && Op->isDebug())))
+          Op = getNextOperandForReg(Op);
+      }
 
       return *this;
     }
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index c80f6dc..07b3b45 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -315,6 +315,10 @@ namespace llvm {
   ///  This pass is still in development
   extern char &StrongPHIEliminationID;
 
+  /// LiveIntervals - This analysis keeps track of the live ranges of virtual
+  /// and physical registers.
+  extern char &LiveIntervalsID;
+
   /// LiveStacks pass. An analysis keeping track of the liveness of stack slots.
   extern char &LiveStacksID;
 
@@ -392,6 +396,10 @@ namespace llvm {
   /// into tails of their predecessors.
   extern char &TailDuplicateID;
 
+  /// MachineTraceMetrics - This pass computes critical path and CPU resource
+  /// usage in an ensemble of traces.
+  extern char &MachineTraceMetricsID;
+
   /// EarlyIfConverter - This pass performs if-conversion on SSA form by
   /// inserting cmov instructions.
   extern char &EarlyIfConverterID;
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index a5a912a..1ccfe54 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -422,6 +422,8 @@ public:
                                   int Offset = 0, unsigned char TargetFlags=0) {
     return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
   }
+  SDValue getTargetIndex(int Index, EVT VT, int64_t Offset = 0,
+                         unsigned char TargetFlags = 0);
   // When generating a branch to a BB, we don't in general know enough
   // to provide debug info for the BB at that time, so keep this one around.
   SDValue getBasicBlock(MachineBasicBlock *MBB);
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 41407f1..db361ee 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -146,7 +146,8 @@ public:
   inline bool isMachineOpcode() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc getDebugLoc() const;
-
+  inline void dump() const;
+  inline void dumpr() const;
 
   /// reachesChainWithoutSideEffects - Return true if this operand (which must
   /// be a chain) reaches the specified operand without crossing any
@@ -806,7 +807,12 @@ inline bool SDValue::hasOneUse() const {
 inline const DebugLoc SDValue::getDebugLoc() const {
   return Node->getDebugLoc();
 }
-
+inline void SDValue::dump() const {
+  return Node->dump();
+}
+inline void SDValue::dumpr() const {
+  return Node->dumpr();
+}
 // Define inline functions from the SDUse class.
 
 inline void SDUse::set(const SDValue &V) {
@@ -1343,6 +1349,29 @@ public:
   }
 };
 
+/// Completely target-dependent object reference.
+class TargetIndexSDNode : public SDNode {
+  unsigned char TargetFlags;
+  int Index;
+  int64_t Offset;
+  friend class SelectionDAG;
+public:
+
+  TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
+    : SDNode(ISD::TargetIndex, DebugLoc(), getSDVTList(VT)),
+      TargetFlags(TF), Index(Idx), Offset(Ofs) {}
+public:
+
+  unsigned char getTargetFlags() const { return TargetFlags; }
+  int getIndex() const { return Index; }
+  int64_t getOffset() const { return Offset; }
+
+  static bool classof(const TargetIndexSDNode*) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::TargetIndex;
+  }
+};
+
 class BasicBlockSDNode : public SDNode {
   MachineBasicBlock *MBB;
   friend class SelectionDAG;
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index bda43dc..eb38cd3 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -68,36 +68,38 @@ namespace llvm {
       v2i32          =  22,   //  2 x i32
       v4i32          =  23,   //  4 x i32
       v8i32          =  24,   //  8 x i32
-      v1i64          =  25,   //  1 x i64
-      v2i64          =  26,   //  2 x i64
-      v4i64          =  27,   //  4 x i64
-      v8i64          =  28,   //  8 x i64
-
-      v2f16          =  29,   //  2 x f16
-      v2f32          =  30,   //  2 x f32
-      v4f32          =  31,   //  4 x f32
-      v8f32          =  32,   //  8 x f32
-      v2f64          =  33,   //  2 x f64
-      v4f64          =  34,   //  4 x f64
+      v16i32         =  25,   // 16 x i32
+      v1i64          =  26,   //  1 x i64
+      v2i64          =  27,   //  2 x i64
+      v4i64          =  28,   //  4 x i64
+      v8i64          =  29,   //  8 x i64
+      v16i64         =  30,   // 16 x i64
+
+      v2f16          =  31,   //  2 x f16
+      v2f32          =  32,   //  2 x f32
+      v4f32          =  33,   //  4 x f32
+      v8f32          =  34,   //  8 x f32
+      v2f64          =  35,   //  2 x f64
+      v4f64          =  36,   //  4 x f64
 
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
       FIRST_INTEGER_VECTOR_VALUETYPE = v2i8,
-      LAST_INTEGER_VECTOR_VALUETYPE = v8i64,
+      LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = v4f64,
 
-      x86mmx         =  35,   // This is an X86 MMX value
+      x86mmx         =  37,   // This is an X86 MMX value
 
-      Glue           =  36,   // This glues nodes together during pre-RA sched
+      Glue           =  38,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  37,   // This has no value
+      isVoid         =  39,   // This has no value
 
-      Untyped        =  38,   // This value takes a register, but has
+      Untyped        =  40,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
-      LAST_VALUETYPE =  39,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  41,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -156,7 +158,7 @@ namespace llvm {
       return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
                SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
               (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
-         SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
+               SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
     }
 
     /// isInteger - Return true if this is an integer, or a vector integer type.
@@ -173,6 +175,37 @@ namespace llvm {
               SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
     }
 
+    /// is64BitVector - Return true if this is a 64-bit vector type.
+    bool is64BitVector() const {
+      return (SimpleTy == MVT::v8i8  || SimpleTy == MVT::v4i16 ||
+              SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 ||
+              SimpleTy == MVT::v2f32);
+    }
+
+    /// is128BitVector - Return true if this is a 128-bit vector type.
+    bool is128BitVector() const {
+      return (SimpleTy == MVT::v16i8 || SimpleTy == MVT::v8i16 ||
+              SimpleTy == MVT::v4i32 || SimpleTy == MVT::v2i64 ||
+              SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64);
+    }
+
+    /// is256BitVector - Return true if this is a 256-bit vector type.
+    bool is256BitVector() const {
+      return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64  ||
+              SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
+              SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
+    }
+
+    /// is512BitVector - Return true if this is a 512-bit vector type.
+    bool is512BitVector() const {
+      return (SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
+    }
+
+    /// is1024BitVector - Return true if this is a 1024-bit vector type.
+    bool is1024BitVector() const {
+      return (SimpleTy == MVT::v16i64);
+    }
+
     /// isPow2VectorType - Returns true if the given vector is a power of 2.
     bool isPow2VectorType() const {
       unsigned NElts = getVectorNumElements();
@@ -211,11 +244,13 @@ namespace llvm {
       case v16i16: return i16;
       case v2i32:
       case v4i32:
-      case v8i32: return i32;
+      case v8i32:
+      case v16i32: return i32;
       case v1i64:
       case v2i64:
       case v4i64:
-      case v8i64: return i64;
+      case v8i64:
+      case v16i64: return i64;
       case v2f16: return f16;
       case v2f32:
       case v4f32:
@@ -231,7 +266,9 @@ namespace llvm {
         llvm_unreachable("Not a vector MVT!");
       case v32i8: return 32;
       case v16i8:
-      case v16i16: return 16;
+      case v16i16:
+      case v16i32:
+      case v16i64:return 16;
       case v8i8 :
       case v8i16:
       case v8i32:
@@ -298,7 +335,9 @@ namespace llvm {
       case v4i64:
       case v8f32:
       case v4f64: return 256;
+      case v16i32:
       case v8i64: return 512;
+      case v16i64:return 1024;
       }
     }
 
@@ -371,12 +410,14 @@ namespace llvm {
         if (NumElements == 2)  return MVT::v2i32;
         if (NumElements == 4)  return MVT::v4i32;
         if (NumElements == 8)  return MVT::v8i32;
+        if (NumElements == 16) return MVT::v16i32;
         break;
       case MVT::i64:
         if (NumElements == 1)  return MVT::v1i64;
         if (NumElements == 2)  return MVT::v2i64;
         if (NumElements == 4)  return MVT::v4i64;
         if (NumElements == 8)  return MVT::v8i64;
+        if (NumElements == 16) return MVT::v16i64;
         break;
       case MVT::f16:
         if (NumElements == 2)  return MVT::v2f16;
@@ -490,32 +531,27 @@ namespace llvm {
 
     /// is64BitVector - Return true if this is a 64-bit vector type.
     bool is64BitVector() const {
-      if (!isSimple())
-        return isExtended64BitVector();
-
-      return (V == MVT::v8i8  || V==MVT::v4i16 || V==MVT::v2i32 ||
-              V == MVT::v1i64 || V==MVT::v2f32);
+      return isSimple() ? V.is64BitVector() : isExtended64BitVector();
     }
 
     /// is128BitVector - Return true if this is a 128-bit vector type.
     bool is128BitVector() const {
-      if (!isSimple())
-        return isExtended128BitVector();
-      return (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 ||
-              V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64);
+      return isSimple() ? V.is128BitVector() : isExtended128BitVector();
     }
 
     /// is256BitVector - Return true if this is a 256-bit vector type.
     bool is256BitVector() const {
-      if (!isSimple())
-        return isExtended256BitVector();
-      return (V == MVT::v8f32  || V == MVT::v4f64 || V == MVT::v32i8 ||
-              V == MVT::v16i16 || V == MVT::v8i32 || V == MVT::v4i64);
+      return isSimple() ? V.is256BitVector() : isExtended256BitVector();
     }
 
     /// is512BitVector - Return true if this is a 512-bit vector type.
     bool is512BitVector() const {
-      return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector();
+      return isSimple() ? V.is512BitVector() : isExtended512BitVector();
+    }
+
+    /// is1024BitVector - Return true if this is a 1024-bit vector type.
+    bool is1024BitVector() const {
+      return isSimple() ? V.is1024BitVector() : isExtended1024BitVector();
     }
 
     /// isOverloaded - Return true if this is an overloaded type for TableGen.
@@ -708,6 +744,7 @@ namespace llvm {
     bool isExtended128BitVector() const;
     bool isExtended256BitVector() const;
     bool isExtended512BitVector() const;
+    bool isExtended1024BitVector() const;
     EVT getExtendedVectorElementType() const;
     unsigned getExtendedVectorNumElements() const;
     unsigned getExtendedSizeInBits() const;
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 6c22690..f4b75bd 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -45,22 +45,24 @@ def v16i16 : ValueType<256, 21>;   // 16 x i16 vector value
 def v2i32  : ValueType<64 , 22>;   //  2 x i32 vector value
 def v4i32  : ValueType<128, 23>;   //  4 x i32 vector value
 def v8i32  : ValueType<256, 24>;   //  8 x i32 vector value
-def v1i64  : ValueType<64 , 25>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 26>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 27>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 28>;   //  8 x i64 vector value
+def v16i32 : ValueType<512, 25>;   // 16 x i32 vector value
+def v1i64  : ValueType<64 , 26>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 27>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 28>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 29>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,30>;  // 16 x i64 vector value
 
-def v2f16  : ValueType<32 , 29>;   //  2 x f16 vector value
-def v2f32  : ValueType<64 , 30>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 31>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 32>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 33>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 34>;   //  4 x f64 vector value
+def v2f16  : ValueType<32 , 31>;   //  2 x f16 vector value
+def v2f32  : ValueType<64 , 32>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 33>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 34>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 35>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 36>;   //  4 x f64 vector value
 
-def x86mmx : ValueType<64 , 35>;   // X86 MMX value
-def FlagVT : ValueType<0  , 36>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 37>;   // Produces no value
-def untyped: ValueType<8  , 38>;   // Produces an untyped value
+def x86mmx : ValueType<64 , 37>;   // X86 MMX value
+def FlagVT : ValueType<0  , 38>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 39>;   // Produces no value
+def untyped: ValueType<8  , 40>;   // Produces an untyped value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index fcd57a4..5a60ba5 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -6,6 +6,9 @@
 /* Bug report URL. */
 #undef BUG_REPORT_URL
 
+/* Define if we have libxml2 */
+#undef CLANG_HAVE_LIBXML
+
 /* Relative directory for resource files */
 #undef CLANG_RESOURCE_DIR
 
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 8b969f3..391b830 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -34,6 +34,7 @@ public:
     AvailableExternallyLinkage, ///< Available for inspection, not emission.
     LinkOnceAnyLinkage, ///< Keep one copy of function when linking (inline)
     LinkOnceODRLinkage, ///< Same, but only replaced by something equivalent.
+    LinkOnceODRAutoHideLinkage, ///< Like LinkOnceODRLinkage but addr not taken.
     WeakAnyLinkage,     ///< Keep one copy of named function when linking (weak)
     WeakODRLinkage,     ///< Same, but only replaced by something equivalent.
     AppendingLinkage,   ///< Special purpose, only applies to global arrays
@@ -41,8 +42,6 @@ public:
     PrivateLinkage,     ///< Like Internal, but omit from symbol table.
     LinkerPrivateLinkage, ///< Like Private, but linker removes.
     LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak.
-    LinkerPrivateWeakDefAutoLinkage, ///< Like LinkerPrivateWeak, but possibly
-                                     ///  hidden.
     DLLImportLinkage,   ///< Function to be imported from DLL
     DLLExportLinkage,   ///< Function to be accessible from DLL.
     ExternalWeakLinkage,///< ExternalWeak linkage description.
@@ -123,7 +122,12 @@ public:
     return Linkage == AvailableExternallyLinkage;
   }
   static bool isLinkOnceLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
+    return Linkage == LinkOnceAnyLinkage ||
+           Linkage == LinkOnceODRLinkage ||
+           Linkage == LinkOnceODRAutoHideLinkage;
+  }
+  static bool isLinkOnceODRAutoHideLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkOnceODRAutoHideLinkage;
   }
   static bool isWeakLinkage(LinkageTypes Linkage) {
     return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage;
@@ -143,13 +147,9 @@ public:
   static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) {
     return Linkage == LinkerPrivateWeakLinkage;
   }
-  static bool isLinkerPrivateWeakDefAutoLinkage(LinkageTypes Linkage) {
-    return Linkage == LinkerPrivateWeakDefAutoLinkage;
-  }
   static bool isLocalLinkage(LinkageTypes Linkage) {
     return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
-      isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage) ||
-      isLinkerPrivateWeakDefAutoLinkage(Linkage);
+      isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage);
   }
   static bool isDLLImportLinkage(LinkageTypes Linkage) {
     return Linkage == DLLImportLinkage;
@@ -178,8 +178,7 @@ public:
            Linkage == LinkOnceAnyLinkage ||
            Linkage == CommonLinkage ||
            Linkage == ExternalWeakLinkage ||
-           Linkage == LinkerPrivateWeakLinkage ||
-           Linkage == LinkerPrivateWeakDefAutoLinkage;
+           Linkage == LinkerPrivateWeakLinkage;
   }
 
   /// isWeakForLinker - Whether the definition of this global may be replaced at
@@ -192,10 +191,10 @@ public:
            Linkage == WeakODRLinkage ||
            Linkage == LinkOnceAnyLinkage ||
            Linkage == LinkOnceODRLinkage ||
+           Linkage == LinkOnceODRAutoHideLinkage ||
            Linkage == CommonLinkage ||
            Linkage == ExternalWeakLinkage ||
-           Linkage == LinkerPrivateWeakLinkage ||
-           Linkage == LinkerPrivateWeakDefAutoLinkage;
+           Linkage == LinkerPrivateWeakLinkage;
   }
 
   bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
@@ -205,6 +204,9 @@ public:
   bool hasLinkOnceLinkage() const {
     return isLinkOnceLinkage(Linkage);
   }
+  bool hasLinkOnceODRAutoHideLinkage() const {
+    return isLinkOnceODRAutoHideLinkage(Linkage);
+  }
   bool hasWeakLinkage() const {
     return isWeakLinkage(Linkage);
   }
@@ -215,9 +217,6 @@ public:
   bool hasLinkerPrivateWeakLinkage() const {
     return isLinkerPrivateWeakLinkage(Linkage);
   }
-  bool hasLinkerPrivateWeakDefAutoLinkage() const {
-    return isLinkerPrivateWeakDefAutoLinkage(Linkage);
-  }
   bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
   bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
   bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index e6fa8c3..de97957 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -172,6 +172,7 @@ void initializeMachineLoopRangesPass(PassRegistry&);
 void initializeMachineModuleInfoPass(PassRegistry&);
 void initializeMachineSchedulerPass(PassRegistry&);
 void initializeMachineSinkingPass(PassRegistry&);
+void initializeMachineTraceMetricsPass(PassRegistry&);
 void initializeMachineVerifierPassPass(PassRegistry&);
 void initializeMemCpyOptPass(PassRegistry&);
 void initializeMemDepPrinterPass(PassRegistry&);
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 06850c9..d1a0fee 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -133,9 +133,12 @@ def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
 def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
 def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
 def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32
+def llvm_v16i32_ty     : LLVMType<v16i32>;   // 16 x i32
 def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
 def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
 def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
+def llvm_v8i64_ty      : LLVMType<v8i64>;    //  8 x i64
+def llvm_v16i64_ty     : LLVMType<v16i64>;   // 16 x i64
 
 def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
 def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
@@ -261,6 +264,7 @@ let Properties = [IntrReadMem] in {
   def int_exp  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 }
 
 let Properties = [IntrNoMem] in {
diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IntrinsicsHexagon.td
index efd04f3..8a88729 100644
--- a/include/llvm/IntrinsicsHexagon.td
+++ b/include/llvm/IntrinsicsHexagon.td
@@ -15,7 +15,7 @@
 //
 // All Hexagon intrinsics start with "llvm.hexagon.".
 let TargetPrefix = "hexagon" in {
-  /// Hexagon_Intrinsic - Base class for all altivec intrinsics.
+  /// Hexagon_Intrinsic - Base class for all Hexagon intrinsics.
   class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
                               list<LLVMType> param_types,
                               list<IntrinsicProperty> properties>
diff --git a/include/llvm/IntrinsicsMips.td b/include/llvm/IntrinsicsMips.td
index e260a37..4375ac2 100644
--- a/include/llvm/IntrinsicsMips.td
+++ b/include/llvm/IntrinsicsMips.td
@@ -22,26 +22,22 @@ let TargetPrefix = "mips" in {  // All intrinsics start with "llvm.mips.".
 // Addition/subtraction
 
 def int_mips_addu_qb : GCCBuiltin<"__builtin_mips_addu_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_addu_s_qb : GCCBuiltin<"__builtin_mips_addu_s_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_subu_qb : GCCBuiltin<"__builtin_mips_subu_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 def int_mips_subu_s_qb : GCCBuiltin<"__builtin_mips_subu_s_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], []>;
 
 def int_mips_addq_ph : GCCBuiltin<"__builtin_mips_addq_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_addq_s_ph : GCCBuiltin<"__builtin_mips_addq_s_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_subq_ph : GCCBuiltin<"__builtin_mips_subq_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_subq_s_ph : GCCBuiltin<"__builtin_mips_subq_s_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], []>;
 
 def int_mips_madd: GCCBuiltin<"__builtin_mips_madd">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i32_ty],
@@ -58,17 +54,14 @@ def int_mips_msubu: GCCBuiltin<"__builtin_mips_msubu">,
             [IntrNoMem]>;
 
 def int_mips_addq_s_w: GCCBuiltin<"__builtin_mips_addq_s_w">,
-  Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [Commutative]>;
 def int_mips_subq_s_w: GCCBuiltin<"__builtin_mips_subq_s_w">,
-  Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
+  Intrinsic<[mips_q31_ty], [mips_q31_ty, mips_q31_ty], []>;
 
 def int_mips_addsc: GCCBuiltin<"__builtin_mips_addsc">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>;
 def int_mips_addwc: GCCBuiltin<"__builtin_mips_addwc">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [Commutative]>;
 
 def int_mips_modsub: GCCBuiltin<"__builtin_mips_modsub">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -80,9 +73,9 @@ def int_mips_raddu_w_qb: GCCBuiltin<"__builtin_mips_raddu_w_qb">,
 // Absolute value
 
 def int_mips_absq_s_ph: GCCBuiltin<"__builtin_mips_absq_s_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty], []>;
 def int_mips_absq_s_w: GCCBuiltin<"__builtin_mips_absq_s_w">,
-  Intrinsic<[mips_q31_ty], [mips_q31_ty], [IntrNoMem]>;
+  Intrinsic<[mips_q31_ty], [mips_q31_ty], []>;
 
 //===----------------------------------------------------------------------===//
 // Precision reduce/expand
@@ -90,11 +83,11 @@ def int_mips_absq_s_w: GCCBuiltin<"__builtin_mips_absq_s_w">,
 def int_mips_precrq_qb_ph: GCCBuiltin<"__builtin_mips_precrq_qb_ph">,
   Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
 def int_mips_precrqu_s_qb_ph: GCCBuiltin<"__builtin_mips_precrqu_s_qb_ph">,
-  Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i8_ty], [mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_precrq_ph_w: GCCBuiltin<"__builtin_mips_precrq_ph_w">,
   Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
 def int_mips_precrq_rs_ph_w: GCCBuiltin<"__builtin_mips_precrq_rs_ph_w">,
-  Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_q31_ty, mips_q31_ty], []>;
 def int_mips_preceq_w_phl: GCCBuiltin<"__builtin_mips_preceq_w_phl">,
   Intrinsic<[mips_q31_ty], [mips_v2q15_ty], [IntrNoMem]>;
 def int_mips_preceq_w_phr: GCCBuiltin<"__builtin_mips_preceq_w_phr">,
@@ -120,19 +113,19 @@ def int_mips_preceu_ph_qbra: GCCBuiltin<"__builtin_mips_preceu_ph_qbra">,
 // Shift
 
 def int_mips_shll_qb: GCCBuiltin<"__builtin_mips_shll_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], []>;
 def int_mips_shrl_qb: GCCBuiltin<"__builtin_mips_shrl_qb">,
   Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_shll_ph: GCCBuiltin<"__builtin_mips_shll_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>;
 def int_mips_shll_s_ph: GCCBuiltin<"__builtin_mips_shll_s_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], []>;
 def int_mips_shra_ph: GCCBuiltin<"__builtin_mips_shra_ph">,
   Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_shra_r_ph: GCCBuiltin<"__builtin_mips_shra_r_ph">,
   Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_shll_s_w: GCCBuiltin<"__builtin_mips_shll_s_w">,
-  Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], []>;
 def int_mips_shra_r_w: GCCBuiltin<"__builtin_mips_shra_r_w">,
   Intrinsic<[mips_q31_ty], [mips_q31_ty, llvm_i32_ty], [IntrNoMem]>;
 def int_mips_shilo: GCCBuiltin<"__builtin_mips_shilo">,
@@ -142,33 +135,25 @@ def int_mips_shilo: GCCBuiltin<"__builtin_mips_shilo">,
 // Multiplication
 
 def int_mips_muleu_s_ph_qbl: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbl">,
-  Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>;
 def int_mips_muleu_s_ph_qbr: GCCBuiltin<"__builtin_mips_muleu_s_ph_qbr">,
-  Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [llvm_v4i8_ty, mips_v2q15_ty], []>;
 def int_mips_mulq_rs_ph: GCCBuiltin<"__builtin_mips_mulq_rs_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_muleq_s_w_phl: GCCBuiltin<"__builtin_mips_muleq_s_w_phl">,
-  Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_muleq_s_w_phr: GCCBuiltin<"__builtin_mips_muleq_s_w_phr">,
-  Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[mips_q31_ty], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_mulsaq_s_w_ph: GCCBuiltin<"__builtin_mips_mulsaq_s_w_ph">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_maq_s_w_phl: GCCBuiltin<"__builtin_mips_maq_s_w_phl">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_maq_s_w_phr: GCCBuiltin<"__builtin_mips_maq_s_w_phr">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_maq_sa_w_phl: GCCBuiltin<"__builtin_mips_maq_sa_w_phl">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_maq_sa_w_phr: GCCBuiltin<"__builtin_mips_maq_sa_w_phr">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_mult: GCCBuiltin<"__builtin_mips_mult">,
   Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem, Commutative]>;
@@ -192,69 +177,62 @@ def int_mips_dpsu_h_qbr: GCCBuiltin<"__builtin_mips_dpsu_h_qbr">,
   Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_v4i8_ty, llvm_v4i8_ty],
             [IntrNoMem]>;
 def int_mips_dpaq_s_w_ph: GCCBuiltin<"__builtin_mips_dpaq_s_w_ph">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_dpsq_s_w_ph: GCCBuiltin<"__builtin_mips_dpsq_s_w_ph">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_v2q15_ty, mips_v2q15_ty], []>;
 def int_mips_dpaq_sa_l_w: GCCBuiltin<"__builtin_mips_dpaq_sa_l_w">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>;
 def int_mips_dpsq_sa_l_w: GCCBuiltin<"__builtin_mips_dpsq_sa_l_w">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty],
-            [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, mips_q31_ty, mips_q31_ty], []>;
 
 //===----------------------------------------------------------------------===//
 // Comparison
 
 def int_mips_cmpu_eq_qb: GCCBuiltin<"__builtin_mips_cmpu_eq_qb">,
-  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpu_lt_qb: GCCBuiltin<"__builtin_mips_cmpu_lt_qb">,
-  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpu_le_qb: GCCBuiltin<"__builtin_mips_cmpu_le_qb">,
-  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpgu_eq_qb: GCCBuiltin<"__builtin_mips_cmpgu_eq_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpgu_lt_qb: GCCBuiltin<"__builtin_mips_cmpgu_lt_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmpgu_le_qb: GCCBuiltin<"__builtin_mips_cmpgu_le_qb">,
-  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty],
-            [IntrNoMem, Commutative]>;
+  Intrinsic<[llvm_i32_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [Commutative]>;
 def int_mips_cmp_eq_ph: GCCBuiltin<"__builtin_mips_cmp_eq_ph">,
-  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_cmp_lt_ph: GCCBuiltin<"__builtin_mips_cmp_lt_ph">,
-  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 def int_mips_cmp_le_ph: GCCBuiltin<"__builtin_mips_cmp_le_ph">,
-  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem, Commutative]>;
+  Intrinsic<[], [mips_v2q15_ty, mips_v2q15_ty], [Commutative]>;
 
 //===----------------------------------------------------------------------===//
 // Extracting
 
 def int_mips_extr_s_h: GCCBuiltin<"__builtin_mips_extr_s_h">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 def int_mips_extr_w: GCCBuiltin<"__builtin_mips_extr_w">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 def int_mips_extr_rs_w: GCCBuiltin<"__builtin_mips_extr_rs_w">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 def int_mips_extr_r_w: GCCBuiltin<"__builtin_mips_extr_r_w">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 def int_mips_extp: GCCBuiltin<"__builtin_mips_extp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 def int_mips_extpdp: GCCBuiltin<"__builtin_mips_extpdp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 
 //===----------------------------------------------------------------------===//
 // Misc
 
 def int_mips_wrdsp: GCCBuiltin<"__builtin_mips_wrdsp">,
-  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
 def int_mips_rddsp: GCCBuiltin<"__builtin_mips_rddsp">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
 
 def int_mips_insv: GCCBuiltin<"__builtin_mips_insv">,
-  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
 def int_mips_bitrev: GCCBuiltin<"__builtin_mips_bitrev">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
@@ -267,15 +245,15 @@ def int_mips_repl_ph: GCCBuiltin<"__builtin_mips_repl_ph">,
   Intrinsic<[mips_v2q15_ty], [llvm_i32_ty], [IntrNoMem]>;
 
 def int_mips_pick_qb: GCCBuiltin<"__builtin_mips_pick_qb">,
-  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_v4i8_ty], [llvm_v4i8_ty, llvm_v4i8_ty], [IntrReadMem]>;
 def int_mips_pick_ph: GCCBuiltin<"__builtin_mips_pick_ph">,
-  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrNoMem]>;
+  Intrinsic<[mips_v2q15_ty], [mips_v2q15_ty, mips_v2q15_ty], [IntrReadMem]>;
 
 def int_mips_mthlip: GCCBuiltin<"__builtin_mips_mthlip">,
-  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], []>;
 
 def int_mips_bposge32: GCCBuiltin<"__builtin_mips_bposge32">,
-  Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
 
 def int_mips_lbux: GCCBuiltin<"__builtin_mips_lbux">,
   Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadArgMem]>;
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
new file mode 100644
index 0000000..22b3c32
--- /dev/null
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -0,0 +1,32 @@
+//===-- llvm/MC/MCFixedLenDisassembler.h - Decoder driver -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Fixed length disassembler decoder state machine driver.
+//===----------------------------------------------------------------------===//
+#ifndef MCFIXEDLENDISASSEMBLER_H
+#define MCFIXEDLENDISASSEMBLER_H
+
+namespace llvm {
+
+namespace MCD {
+// Disassembler state machine opcodes.
+enum DecoderOps {
+  OPC_ExtractField = 1, // OPC_ExtractField(uint8_t Start, uint8_t Len)
+  OPC_FilterValue,      // OPC_FilterValue(uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckField,       // OPC_CheckField(uint8_t Start, uint8_t Len,
+                        //                uleb128 Val, uint16_t NumToSkip)
+  OPC_CheckPredicate,   // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
+  OPC_Decode,           // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+  OPC_SoftFail,         // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
+  OPC_Fail              // OPC_Fail()
+};
+
+} // namespace MCDecode
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 186612d..dbf16d8 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -107,6 +107,7 @@ namespace MCID {
     Compare,
     MoveImm,
     Bitcast,
+    Select,
     DelaySlot,
     FoldableAsLoad,
     MayLoad,
@@ -282,6 +283,12 @@ public:
     return Flags & (1 << MCID::Bitcast);
   }
 
+  /// isSelect - Return true if this is a select instruction.
+  ///
+  bool isSelect() const {
+    return Flags & (1 << MCID::Select);
+  }
+
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 6e44e6c..9591a00 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -182,11 +182,6 @@ public:
 
   /// @}
 
-  /// Utility function to encode a SLEB128 value.
-  static void EncodeSLEB128(int64_t Value, raw_ostream &OS);
-  /// Utility function to encode a ULEB128 value.
-  static void EncodeULEB128(uint64_t Value, raw_ostream &OS,
-                            unsigned Padding = 0);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index c541f05..46a9d71 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -111,6 +111,10 @@ struct MCRegisterDesc {
   uint32_t SubRegs;   // Sub-register set, described above
   uint32_t SuperRegs; // Super-register set, described above
 
+  // Offset into MCRI::SubRegIndices of a list of sub-register indices for each
+  // sub-register in SubRegs.
+  uint32_t SubRegIndices;
+
   // RegUnits - Points to the list of register units. The low 4 bits holds the
   // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
   uint32_t RegUnits;
@@ -148,7 +152,6 @@ private:
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const uint16_t (*RegUnitRoots)[2];          // Pointer to regunit root table.
-  const uint16_t *RegLists;                   // Pointer to the reglists array
   const uint16_t *DiffLists;                  // Pointer to the difflists array
   const char *RegStrings;                     // Pointer to the string table.
   const uint16_t *SubRegIndices;              // Pointer to the subreg lookup
@@ -168,25 +171,6 @@ private:
   DenseMap<unsigned, int> L2SEHRegs;          // LLVM to SEH regs mapping
 
 public:
-  /// RegListIterator. This iterator class is used to traverse lists of
-  /// super-registers, sub-registers, and overlapping registers. Don't use it
-  /// directly, use one of the sub-classes defined below.
-  class RegListIterator {
-    const uint16_t *Pos;
-  public:
-    explicit RegListIterator(const uint16_t *Table)
-      : Pos(Table) {}
-
-    /// isValid - Return false when the end of the list is reached.
-    bool isValid() const { return *Pos; }
-
-    /// Dereference the iterator to get the current register.
-    unsigned operator*() const { return *Pos; }
-
-    /// Pre-increment. Move to the next register.
-    void operator++() { ++Pos; }
-  };
-
   /// DiffListIterator - Base iterator class that can traverse the
   /// differentially encoded register and regunit lists in DiffLists.
   /// Don't use this class directly, use one of the specialized sub-classes
@@ -233,8 +217,8 @@ public:
     }
   };
 
-  // These iterators are allowed to sub-class RegListIterator and
-  // DiffListIterator and access internal list pointers.
+  // These iterators are allowed to sub-class DiffListIterator and access
+  // internal list pointers.
   friend class MCSubRegIterator;
   friend class MCSuperRegIterator;
   friend class MCRegAliasIterator;
@@ -247,7 +231,6 @@ public:
                           const MCRegisterClass *C, unsigned NC,
                           const uint16_t (*RURoots)[2],
                           unsigned NRU,
-                          const uint16_t *RL,
                           const uint16_t *DL,
                           const char *Strings,
                           const uint16_t *SubIndices,
@@ -257,7 +240,6 @@ public:
     NumRegs = NR;
     RAReg = RA;
     Classes = C;
-    RegLists = RL;
     DiffLists = DL;
     RegStrings = Strings;
     NumClasses = NC;
@@ -327,9 +309,7 @@ public:
   /// getSubReg - Returns the physical register number of sub-register "Index"
   /// for physical register RegNo. Return zero if the sub-register does not
   /// exist.
-  unsigned getSubReg(unsigned Reg, unsigned Idx) const {
-    return *(SubRegIndices + (Reg - 1) * NumSubRegIndices + Idx - 1);
-  }
+  unsigned getSubReg(unsigned Reg, unsigned Idx) const;
 
   /// getMatchingSuperReg - Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
@@ -339,12 +319,7 @@ public:
   /// getSubRegIndex - For a given register pair, return the sub-register index
   /// if the second register is a sub-register of the first. Return zero
   /// otherwise.
-  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {
-    for (unsigned I = 1; I <= NumSubRegIndices; ++I)
-      if (getSubReg(RegNo, I) == SubRegNo)
-        return I;
-    return 0;
-  }
+  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;
 
   /// getName - Return the human-readable symbolic target-specific name for the
   /// specified physical register.
@@ -369,36 +344,15 @@ public:
   /// number.  Returns -1 if there is no equivalent value.  The second
   /// parameter allows targets to use different numberings for EH info and
   /// debugging info.
-  int getDwarfRegNum(unsigned RegNum, bool isEH) const {
-    const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
-    unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
-
-    DwarfLLVMRegPair Key = { RegNum, 0 };
-    const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
-    if (I == M+Size || I->FromReg != RegNum)
-      return -1;
-    return I->ToReg;
-  }
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 
   /// getLLVMRegNum - Map a dwarf register back to a target register.
   ///
-  int getLLVMRegNum(unsigned RegNum, bool isEH) const {
-    const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
-    unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
-
-    DwarfLLVMRegPair Key = { RegNum, 0 };
-    const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
-    assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
-    return I->ToReg;
-  }
+  int getLLVMRegNum(unsigned RegNum, bool isEH) const;
 
   /// getSEHRegNum - Map a target register to an equivalent SEH register
   /// number.  Returns LLVM register number if there is no equivalent value.
-  int getSEHRegNum(unsigned RegNum) const {
-    const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum);
-    if (I == L2SEHRegs.end()) return (int)RegNum;
-    return I->second;
-  }
+  int getSEHRegNum(unsigned RegNum) const;
 
   regclass_iterator regclass_begin() const { return Classes; }
   regclass_iterator regclass_end() const { return Classes+NumClasses; }
@@ -431,37 +385,36 @@ public:
 // aliasing registers. Use these iterator classes to traverse the lists.
 
 /// MCSubRegIterator enumerates all sub-registers of Reg.
-class MCSubRegIterator : public MCRegisterInfo::RegListIterator {
+class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
 public:
-  MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI)
-    : RegListIterator(MCRI->RegLists + MCRI->get(Reg).SubRegs) {}
+  MCSubRegIterator(unsigned Reg, const MCRegisterInfo *MCRI) {
+    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    ++*this;
+  }
 };
 
 /// MCSuperRegIterator enumerates all super-registers of Reg.
-class MCSuperRegIterator : public MCRegisterInfo::RegListIterator {
+class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {
 public:
-  MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI)
-    : RegListIterator(MCRI->RegLists + MCRI->get(Reg).SuperRegs) {}
+  MCSuperRegIterator(unsigned Reg, const MCRegisterInfo *MCRI) {
+    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    ++*this;
+  }
 };
 
 /// MCRegAliasIterator enumerates all registers aliasing Reg.
 /// If IncludeSelf is set, Reg itself is included in the list.
-class MCRegAliasIterator : public MCRegisterInfo::RegListIterator {
+class MCRegAliasIterator : public MCRegisterInfo::DiffListIterator {
 public:
-  MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI, bool IncludeSelf)
-    : RegListIterator(MCRI->RegLists + MCRI->get(Reg).Overlaps + !IncludeSelf)
-  {}
+  MCRegAliasIterator(unsigned Reg, const MCRegisterInfo *MCRI,
+                     bool IncludeSelf) {
+    init(Reg, MCRI->DiffLists + MCRI->get(Reg).Overlaps);
+    // Initially, the iterator points to Reg itself.
+    if (!IncludeSelf)
+      ++*this;
+  }
 };
 
-inline
-unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
-                                             const MCRegisterClass *RC) const {
-    for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
-      if (Reg == getSubReg(*Supers, SubIdx) && RC->contains(*Supers))
-        return *Supers;
-    return 0;
-}
-
 //===----------------------------------------------------------------------===//
 //                               Register Units
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 49e3fee..3b1cdf1 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -78,6 +78,11 @@ public:
   unsigned HighLatency;
   static const unsigned DefaultHighLatency = 10;
 
+  // MispredictPenalty is the typical number of extra cycles the processor
+  // takes to recover from a branch misprediction.
+  unsigned MispredictPenalty;
+  static const unsigned DefaultMispredictPenalty = 10;
+
 private:
   // TODO: Add a reference to proc resource types and sched resource tables.
 
@@ -90,17 +95,18 @@ public:
   // target code can use it in static initializers. The defaults need to be
   // initialized in this default ctor because some clients directly instantiate
   // MCSchedModel instead of using a generated itinerary.
-  MCSchedModel(): IssueWidth(DefaultMinLatency),
+  MCSchedModel(): IssueWidth(DefaultIssueWidth),
                   MinLatency(DefaultMinLatency),
                   LoadLatency(DefaultLoadLatency),
                   HighLatency(DefaultHighLatency),
+                  MispredictPenalty(DefaultMispredictPenalty),
                   InstrItineraries(0) {}
 
   // Table-gen driven ctor.
-  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl,
+  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp,
                const InstrItinerary *ii):
     IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl),
-    InstrItineraries(ii){}
+    MispredictPenalty(mp), InstrItineraries(ii){}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 4e3fd0d..91b604b 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -79,6 +79,22 @@ public:
   /// \param DirectiveID - the identifier token of the directive.
   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
 
+  /// MatchInstruction - Recognize a series of operands of a parsed instruction
+  /// as an actual MCInst.  This returns false on success and returns true on
+  /// failure to match.
+  ///
+  /// On failure, the target parser is responsible for emitting a diagnostic
+  /// explaining the match failure.
+  virtual bool
+  MatchInstruction(SMLoc IDLoc,
+                   SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                   SmallVectorImpl<MCInst> &MCInsts,
+                   unsigned &OrigErrorInfo,
+                   bool matchingInlineAsm = false) {
+    OrigErrorInfo = ~0x0;
+    return true;
+  }
+
   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
   /// This returns false on success and returns true on failure to match.
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index cb7c1dc..e6303ac 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -301,11 +301,6 @@ public:
   typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*> >
                    NumeredTypesMapTy;
 
-  /// findUsedStructTypes - Walk the entire module and find all of the
-  /// struct types that are in use, returning them in a vector.
-  void findUsedStructTypes(std::vector<StructType*> &StructTypes,
-                           bool OnlyNamed = false) const;
-  
   /// getTypeByName - Return the type with the specified name, or null if there
   /// is none by that name.
   StructType *getTypeByName(StringRef Name) const;
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index 30a88dc..ba0030d 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -507,9 +507,6 @@ private:
   const Elf_Rela *getRela(DataRefImpl Rela) const;
   const char     *getString(uint32_t section, uint32_t offset) const;
   const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
-  error_code      getSymbolName(const Elf_Shdr *section,
-                                const Elf_Sym *Symb,
-                                StringRef &Res) const;
   error_code      getSymbolVersion(const Elf_Shdr *section,
                                    const Elf_Sym *Symb,
                                    StringRef &Version,
@@ -521,6 +518,11 @@ protected:
   void            validateSymbol(DataRefImpl Symb) const;
 
 public:
+  error_code      getSymbolName(const Elf_Shdr *section,
+                                const Elf_Sym *Symb,
+                                StringRef &Res) const;
+  error_code      getSectionName(const Elf_Shdr *section,
+                                 StringRef &Res) const;
   const Elf_Dyn  *getDyn(DataRefImpl DynData) const;
   error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
                               bool &IsDefault) const;
@@ -599,11 +601,15 @@ public:
   virtual StringRef getObjectType() const { return "ELF"; }
   virtual unsigned getArch() const;
   virtual StringRef getLoadName() const;
+  virtual error_code getSectionContents(const Elf_Shdr *sec,
+                                        StringRef &Res) const;
 
   uint64_t getNumSections() const;
   uint64_t getStringTableIndex() const;
   ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
   const Elf_Shdr *getSection(const Elf_Sym *symb) const;
+  const Elf_Shdr *getElfSection(section_iterator &It) const;
+  const Elf_Sym *getElfSymbol(symbol_iterator &It) const;
 
   // Methods for type inquiry through isa, cast, and dyn_cast
   bool isDyldType() const { return isDyldELFObject; }
@@ -785,6 +791,21 @@ ELFObjectFile<target_endianness, is64Bits>
 }
 
 template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>
+                             ::getElfSection(section_iterator &It) const {
+  llvm::object::DataRefImpl ShdrRef = It->getRawDataRefImpl();
+  return reinterpret_cast<const Elf_Shdr *>(ShdrRef.p);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>
+                             ::getElfSymbol(symbol_iterator &It) const {
+  return getSymbol(It->getRawDataRefImpl());
+}
+
+template<support::endianness target_endianness, bool is64Bits>
 error_code ELFObjectFile<target_endianness, is64Bits>
                         ::getSymbolFileOffset(DataRefImpl Symb,
                                           uint64_t &Result) const {
@@ -1062,6 +1083,15 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 
 template<support::endianness target_endianness, bool is64Bits>
 error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionContents(const Elf_Shdr *Sec,
+                                             StringRef &Result) const {
+  const char *start = (const char*)base() + Sec->sh_offset;
+  Result = StringRef(start, Sec->sh_size);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
                         ::getSectionAlignment(DataRefImpl Sec,
                                               uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
@@ -2016,6 +2046,9 @@ unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
     return Triple::arm;
   case ELF::EM_HEXAGON:
     return Triple::hexagon;
+  case ELF::EM_MIPS:
+    return (target_endianness == support::little) ?
+           Triple::mipsel : Triple::mips;
   default:
     return Triple::UnknownArch;
   }
@@ -2155,6 +2188,14 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 
 template<support::endianness target_endianness, bool is64Bits>
 error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionName(const Elf_Shdr *section,
+                                        StringRef &Result) const {
+  Result = StringRef(getString(dot_shstrtab_sec, section->sh_name));
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
                         ::getSymbolVersion(const Elf_Shdr *section,
                                            const Elf_Sym *symb,
                                            StringRef &Version,
diff --git a/include/llvm/Object/MachOFormat.h b/include/llvm/Object/MachOFormat.h
index f30d431..e4bfcc6 100644
--- a/include/llvm/Object/MachOFormat.h
+++ b/include/llvm/Object/MachOFormat.h
@@ -273,6 +273,10 @@ namespace macho {
     uint16_t Flags;
     uint32_t Value;
   };
+  // Despite containing a uint64_t, this structure is only 4-byte aligned within
+  // a MachO file.
+#pragma pack(push)
+#pragma pack(4)
   struct Symbol64TableEntry {
     uint32_t StringIndex;
     uint8_t Type;
@@ -280,6 +284,7 @@ namespace macho {
     uint16_t Flags;
     uint64_t Value;
   };
+#pragma pack(pop)
 
   /// @}
   /// @name Data-in-code Table Entry
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 85607c8..cf71251 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -107,8 +107,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 // Any larger and MSVC complains.
 #undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
 
-/// \brief This class template exposes a typedef for type containing a suitable
-/// aligned character array to hold elements of any of up to four types.
+/// \brief This union template exposes a suitably aligned and sized character
+/// array member which can hold elements of any of up to four types.
 ///
 /// These types may be arrays, structs, or any other types. The goal is to
 /// produce a union type containing a character array which, when used, forms
@@ -116,7 +116,8 @@ LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8192);
 /// than four types can be added at the cost of more boiler plate.
 template <typename T1,
           typename T2 = char, typename T3 = char, typename T4 = char>
-class AlignedCharArray {
+union AlignedCharArrayUnion {
+private:
   class AlignerImpl {
     T1 t1; T2 t2; T3 t3; T4 t4;
 
@@ -127,6 +128,12 @@ class AlignedCharArray {
   };
 
 public:
+  /// \brief The character array buffer for use by clients.
+  ///
+  /// No other member of this union should be referenced. The exist purely to
+  /// constrain the layout of this character array.
+  char buffer[sizeof(SizerImpl)];
+
   // Sadly, Clang and GCC both fail to align a character array properly even
   // with an explicit alignment attribute. To work around this, we union
   // the character array that will actually be used with a struct that contains
@@ -134,16 +141,10 @@ public:
   // and GCC will properly register the alignment of a struct containing an
   // aligned member, and this alignment should carry over to the character
   // array in the union.
-  union union_type {
-    // This is the only member of the union which should be used by clients:
-    char buffer[sizeof(SizerImpl)];
-
-    // This member of the union only exists to force the alignment.
-    struct {
-      typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
-        nonce_inner_member;
-    } nonce_member;
-  };
+  struct {
+    typename llvm::AlignedCharArrayImpl<AlignOf<AlignerImpl>::Alignment>::type
+      nonce_inner_member;
+  } nonce_member;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index b18ba2b..ba8adb0 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -50,6 +50,8 @@ namespace COFF {
   };
 
   enum MachineTypes {
+    MT_Invalid = 0xffff,
+
     IMAGE_FILE_MACHINE_UNKNOWN   = 0x0,
     IMAGE_FILE_MACHINE_AM33      = 0x13,
     IMAGE_FILE_MACHINE_AMD64     = 0x8664,
@@ -74,6 +76,8 @@ namespace COFF {
   };
 
   enum Characteristics {
+    C_Invalid = 0,
+
     /// The file does not contain base relocations and must be loaded at its
     /// preferred base. If this cannot be done, the loader will error.
     IMAGE_FILE_RELOCS_STRIPPED         = 0x0001,
@@ -138,6 +142,8 @@ namespace COFF {
 
   /// Storage class tells where and what the symbol represents
   enum SymbolStorageClass {
+    SSC_Invalid = 0xff,
+
     IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
     IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
     IMAGE_SYM_CLASS_AUTOMATIC        = 1,   ///< Stack variable
@@ -214,6 +220,8 @@ namespace COFF {
   };
 
   enum SectionCharacteristics {
+    SC_Invalid = 0xffffffff,
+
     IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
     IMAGE_SCN_CNT_CODE               = 0x00000020,
     IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index f654f32..ea0a4da 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -38,6 +38,25 @@
 #define llvm_move(value) (value)
 #endif
 
+/// LLVM_DELETED_FUNCTION - Expands to = delete if the compiler supports it.
+/// Use to mark functions as uncallable. Member functions with this should
+/// be declared private so that some behaivor is kept in C++03 mode.
+///
+/// class DontCopy {
+/// private:
+///   DontCopy(const DontCopy&) LLVM_DELETED_FUNCTION;
+///   DontCopy &operator =(const DontCopy&) LLVM_DELETED_FUNCTION;
+/// public:
+///   ...
+/// };
+#if (__has_feature(cxx_deleted_functions) \
+     || defined(__GXX_EXPERIMENTAL_CXX0X__))
+     // No version of MSVC currently supports this.
+#define LLVM_DELETED_FUNCTION = delete
+#else
+#define LLVM_DELETED_FUNCTION
+#endif
+
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
@@ -168,4 +187,13 @@
 # define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
 #endif
 
+// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
+// which causes the program to exit abnormally.
+#if defined(__clang__) || (__GNUC__ > 4) \
+ || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+# define LLVM_BUILTIN_TRAP __builtin_trap()
+#else
+# define LLVM_BUILTIN_TRAP *(volatile int*)0x11 = 0
+#endif
+
 #endif
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
index a3a6489..7484abd 100644
--- a/include/llvm/Support/DataTypes.h.cmake
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -79,18 +79,6 @@ typedef u_int64_t uint64_t;
 #endif
 #endif
 
-#ifdef _OpenBSD_
-#define INT8_MAX 127
-#define INT8_MIN -128
-#define UINT8_MAX 255
-#define INT16_MAX 32767
-#define INT16_MIN -32768
-#define UINT16_MAX 65535
-#define INT32_MAX 2147483647
-#define INT32_MIN -2147483648
-#define UINT32_MAX 4294967295U
-#endif
-
 #else /* _MSC_VER */
 /* Visual C++ doesn't provide standard integer headers, but it does provide
    built-in data types. */
diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in
index b492bb1..b9fb48a 100644
--- a/include/llvm/Support/DataTypes.h.in
+++ b/include/llvm/Support/DataTypes.h.in
@@ -79,18 +79,6 @@ typedef u_int64_t uint64_t;
 #endif
 #endif
 
-#ifdef _OpenBSD_
-#define INT8_MAX 127
-#define INT8_MIN -128
-#define UINT8_MAX 255
-#define INT16_MAX 32767
-#define INT16_MIN -32768
-#define UINT16_MAX 65535
-#define INT32_MAX 2147483647
-#define INT32_MIN -2147483648
-#define UINT32_MAX 4294967295U
-#endif
-
 #else /* _MSC_VER */
 /* Visual C++ doesn't provide standard integer headers, but it does provide
    built-in data types. */
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index e723272..896fe84 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -19,7 +19,7 @@
 // foo class.
 //
 // When compiling without assertions, the -debug-* options and all code in
-// DEBUG() statements disappears, so it does not effect the runtime of the code.
+// DEBUG() statements disappears, so it does not affect the runtime of the code.
 //
 //===----------------------------------------------------------------------===//
 
@@ -49,11 +49,11 @@ extern bool DebugFlag;
 ///
 bool isCurrentDebugType(const char *Type);
 
-/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
 /// option were specified.  Note that DebugFlag also needs to be set to true for
 /// debug output to be produced.
 ///
-void SetCurrentDebugType(const char *Type);
+void setCurrentDebugType(const char *Type);
 
 /// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
 /// information.  In the '-debug' option is specified on the commandline, and if
@@ -70,7 +70,7 @@ void SetCurrentDebugType(const char *Type);
 
 #else
 #define isCurrentDebugType(X) (false)
-#define SetCurrentDebugType(X)
+#define setCurrentDebugType(X)
 #define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
 #endif
 
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
new file mode 100644
index 0000000..0f07164
--- /dev/null
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -0,0 +1,97 @@
+//=== FileOutputBuffer.h - File Output Buffer -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility for creating a in-memory buffer that will be written to a file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILEOUTPUTBUFFER_H
+#define LLVM_SUPPORT_FILEOUTPUTBUFFER_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class error_code;
+template<class T> class OwningPtr;
+
+/// FileOutputBuffer - This interface provides simple way to create an in-memory
+/// buffer which will be written to a file. During the lifetime of these 
+/// objects, the content or existence of the specified file is undefined. That
+/// is, creating an OutputBuffer for a file may immediately remove the file.
+/// If the FileOutputBuffer is committed, the target file's content will become 
+/// the buffer content at the time of the commit.  If the FileOutputBuffer is  
+/// not committed, the file will be deleted in the FileOutputBuffer destructor.
+class FileOutputBuffer {
+public:
+
+  enum  {
+    F_executable = 1  /// set the 'x' bit on the resulting file
+  }; 
+
+  /// Factory method to create an OutputBuffer object which manages a read/write
+  /// buffer of the specified size. When committed, the buffer will be written
+  /// to the file at the specified path.  
+  static error_code create(StringRef FilePath, size_t Size, 
+                           OwningPtr<FileOutputBuffer> &Result, 
+                           unsigned Flags=0);
+  
+
+  /// Returns a pointer to the start of the buffer.
+  uint8_t *getBufferStart() const {
+    return BufferStart;
+  }
+  
+  /// Returns a pointer to the end of the buffer.
+  uint8_t *getBufferEnd() const {
+    return BufferEnd;
+  }
+  
+  /// Returns size of the buffer.
+  size_t getBufferSize() const {
+    return BufferEnd - BufferStart;
+  }
+  
+  /// Returns path where file will show up if buffer is committed.
+  StringRef getPath() const {
+    return FinalPath;
+  }
+    
+  /// Flushes the content of the buffer to its file and deallocates the 
+  /// buffer.  If commit() is not called before this object's destructor
+  /// is called, the file is deleted in the destructor. The optional parameter
+  /// is used if it turns out you want the file size to be smaller than
+  /// initially requested.
+  error_code commit(int64_t NewSmallerSize = -1);
+  
+  /// If this object was previously committed, the destructor just deletes
+  /// this object.  If this object was not committed, the destructor
+  /// deallocates the buffer and the target file is never written.
+  ~FileOutputBuffer();
+
+  
+protected:
+  FileOutputBuffer(const FileOutputBuffer &); // DO NOT IMPLEMENT
+  FileOutputBuffer &operator=(const FileOutputBuffer &); // DO NOT IMPLEMENT
+  FileOutputBuffer(uint8_t *Start, uint8_t *End, 
+                    StringRef Path, StringRef TempPath);
+    
+  uint8_t            *BufferStart;
+  uint8_t            *BufferEnd;
+  SmallString<128>    FinalPath;
+  SmallString<128>    TempPath;
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index e0353f9..f4a9aa0 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -28,6 +28,7 @@
 #define LLVM_SUPPORT_FILE_SYSTEM_H
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
@@ -576,6 +577,82 @@ error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result);
 error_code GetMainExecutable(const char *argv0, void *MainAddr,
                              SmallVectorImpl<char> &result);
 
+/// This class represents a memory mapped file. It is based on
+/// boost::iostreams::mapped_file.
+class mapped_file_region {
+  mapped_file_region() LLVM_DELETED_FUNCTION;
+  mapped_file_region(mapped_file_region&) LLVM_DELETED_FUNCTION;
+  mapped_file_region &operator =(mapped_file_region&) LLVM_DELETED_FUNCTION;
+
+public:
+  enum mapmode {
+    readonly, //< May only access map via const_data as read only.
+    readwrite, //< May access map via data and modify it. Written to path.
+    priv //< May modify via data, but changes are lost on destruction.
+  };
+
+private:
+  /// Platform specific mapping state.
+  mapmode Mode;
+  uint64_t Size;
+  void *Mapping;
+#if LLVM_ON_WIN32
+  int FileDescriptor;
+  void *FileHandle;
+  void *FileMappingHandle;
+#endif
+
+  error_code init(int FD, uint64_t Offset);
+
+public:
+  typedef char char_type;
+
+#if LLVM_USE_RVALUE_REFERENCES
+  mapped_file_region(mapped_file_region&&);
+  mapped_file_region &operator =(mapped_file_region&&);
+#endif
+
+  /// Construct a mapped_file_region at \a path starting at \a offset of length
+  /// \a length and with access \a mode.
+  ///
+  /// \param path Path to the file to map. If it does not exist it will be
+  ///             created.
+  /// \param mode How to map the memory.
+  /// \param length Number of bytes to map in starting at \a offset. If the file
+  ///               is shorter than this, it will be extended. If \a length is
+  ///               0, the entire file will be mapped.
+  /// \param offset Byte offset from the beginning of the file where the map
+  ///               should begin. Must be a multiple of
+  ///               mapped_file_region::alignment().
+  /// \param ec This is set to errc::success if the map was constructed
+  ///           sucessfully. Otherwise it is set to a platform dependent error.
+  mapped_file_region(const Twine &path,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  /// \param fd An open file descriptor to map. mapped_file_region takes
+  ///           ownership. It must have been opended in the correct mode.
+  mapped_file_region(int fd,
+                     mapmode mode,
+                     uint64_t length,
+                     uint64_t offset,
+                     error_code &ec);
+
+  ~mapped_file_region();
+
+  mapmode flags() const;
+  uint64_t size() const;
+  char *data() const;
+
+  /// Get a const view of the data. Modifying this memory has undefined
+  /// behaivor.
+  const char *const_data() const;
+
+  /// \returns The minimum alignment offset must be.
+  static int alignment();
+};
 
 /// @brief Memory maps the contents of a file
 ///
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index 52de8f6..109b3cf 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -13,6 +13,8 @@
 
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -145,14 +147,17 @@ public:
   // visitMul to proxy to visitBinaryOperator for instance in case the user does
   // not need this generality.
   //
-  // The one problem case we have to handle here though is that the PHINode
-  // class and opcode name are the exact same.  Because of this, we cannot
-  // define visitPHINode (the inst version) to forward to visitPHINode (the
-  // generic version) without multiply defined symbols and recursion.  To handle
-  // this, we do not autoexpand "Other" instructions, we do it manually.
-  //
+  // These functions can also implement fan-out, when a single opcode and
+  // instruction have multiple more specific Instruction subclasses. The Call
+  // instruction currently supports this. We implement that by redirecting that
+  // instruction to a special delegation helper.
 #define HANDLE_INST(NUM, OPCODE, CLASS) \
-    RetTy visit##OPCODE(CLASS &I) { DELEGATE(CLASS); }
+    RetTy visit##OPCODE(CLASS &I) { \
+      if (NUM == Instruction::Call) \
+        return delegateCallInst(I); \
+      else \
+        DELEGATE(CLASS); \
+    }
 #include "llvm/Instruction.def"
 
   // Specific Instruction type classes... note that all of the casts are
@@ -195,6 +200,17 @@ public:
   RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
   RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
 
+  // Handle the special instrinsic instruction classes.
+  RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgValueInst(DbgValueInst &I)        { DELEGATE(DbgInfoIntrinsic);}
+  RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { DELEGATE(IntrinsicInst); }
+  RetTy visitMemSetInst(MemSetInst &I)            { DELEGATE(MemIntrinsic); }
+  RetTy visitMemCpyInst(MemCpyInst &I)            { DELEGATE(MemTransferInst); }
+  RetTy visitMemMoveInst(MemMoveInst &I)          { DELEGATE(MemTransferInst); }
+  RetTy visitMemTransferInst(MemTransferInst &I)  { DELEGATE(MemIntrinsic); }
+  RetTy visitMemIntrinsic(MemIntrinsic &I)        { DELEGATE(IntrinsicInst); }
+  RetTy visitIntrinsicInst(IntrinsicInst &I)      { DELEGATE(CallInst); }
+
   // Call and Invoke are slightly different as they delegate first through
   // a generic CallSite visitor.
   RetTy visitCallInst(CallInst &I) {
@@ -234,6 +250,29 @@ public:
   // Note that you MUST override this function if your return type is not void.
   //
   void visitInstruction(Instruction &I) {}  // Ignore unhandled instructions
+
+private:
+  // Special helper function to delegate to CallInst subclass visitors.
+  RetTy delegateCallInst(CallInst &I) {
+    if (const Function *F = I.getCalledFunction()) {
+      switch ((Intrinsic::ID)F->getIntrinsicID()) {
+      default:                     DELEGATE(IntrinsicInst);
+      case Intrinsic::dbg_declare: DELEGATE(DbgDeclareInst);
+      case Intrinsic::dbg_value:   DELEGATE(DbgValueInst);
+      case Intrinsic::memcpy:      DELEGATE(MemCpyInst);
+      case Intrinsic::memmove:     DELEGATE(MemMoveInst);
+      case Intrinsic::memset:      DELEGATE(MemSetInst);
+      case Intrinsic::not_intrinsic: break;
+      }
+    }
+    DELEGATE(CallInst);
+  }
+
+  // An overload that will never actually be called, it is used only from dead
+  // code in the dispatching from opcodes to instruction subclasses.
+  RetTy delegateCallInst(Instruction &I) {
+    llvm_unreachable("delegateCallInst called for non-CallInst");
+  }
 };
 
 #undef DELEGATE
diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h
new file mode 100644
index 0000000..410edd4
--- /dev/null
+++ b/include/llvm/Support/LEB128.h
@@ -0,0 +1,95 @@
+//===- llvm/Support/LEB128.h - [SU]LEB128 utility functions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares some utility functions for encoding SLEB128 and
+// ULEB128 values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_LEB128_H
+#define LLVM_SYSTEM_LEB128_H
+
+#include <llvm/Support/raw_ostream.h>
+
+namespace llvm {
+
+/// Utility function to encode a SLEB128 value to an output stream.
+static inline void encodeSLEB128(int64_t Value, raw_ostream &OS) {
+  bool More;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    // NOTE: this assumes that this signed shift is an arithmetic right shift.
+    Value >>= 7;
+    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+              ((Value == -1) && ((Byte & 0x40) != 0))));
+    if (More)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (More);
+}
+
+/// Utility function to encode a ULEB128 value to an output stream.
+static inline void encodeULEB128(uint64_t Value, raw_ostream &OS,
+                                 unsigned Padding = 0) {
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0 || Padding != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (Value != 0);
+
+  // Pad with 0x80 and emit a null byte at the end.
+  if (Padding != 0) {
+    for (; Padding != 1; --Padding)
+      OS << '\x80';
+    OS << '\x00';
+  }
+}
+
+/// Utility function to encode a ULEB128 value to a buffer. Returns
+/// the length in bytes of the encoded value.
+static inline unsigned encodeULEB128(uint64_t Value, uint8_t *p,
+                                     unsigned Padding = 0) {
+  uint8_t *orig_p = p;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0 || Padding != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    *p++ = Byte;
+  } while (Value != 0);
+
+  // Pad with 0x80 and emit a null byte at the end.
+  if (Padding != 0) {
+    for (; Padding != 1; --Padding)
+      *p++ = '\x80';
+    *p++ = '\x00';
+  }
+  return (unsigned)(p - orig_p);
+}
+
+
+/// Utility function to decode a ULEB128 value.
+static inline uint64_t decodeULEB128(const uint8_t *p, unsigned *n = 0) {
+  const uint8_t *orig_p = p;
+  uint64_t Value = 0;
+  unsigned Shift = 0;
+  do {
+    Value += (*p & 0x7f) << Shift;
+    Shift += 7;
+  } while (*p++ >= 128);
+  if (n)
+    *n = (unsigned)(p - orig_p);
+  return Value;
+}
+
+}  // namespace llvm
+
+#endif  // LLVM_SYSTEM_LEB128_H
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 75c1a79..8e41a64 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -181,6 +181,12 @@ public:
                                 ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getGetElementPtr(C, IdxList);
   }
+  Constant *CreateGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getGetElementPtr(C, Idx);
+  }
   Instruction *CreateGetElementPtr(Constant *C,
                                    ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::Create(C, IdxList);
@@ -190,6 +196,12 @@ public:
                                         ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getInBoundsGetElementPtr(C, IdxList);
   }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant *Idx) const {
+    // This form of the function only exists to avoid ambiguous overload
+    // warnings about whether to convert Idx to ArrayRef<Constant *> or
+    // ArrayRef<Value *>.
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idx);
+  }
   Instruction *CreateInBoundsGetElementPtr(Constant *C,
                                            ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::CreateInBounds(C, IdxList);
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index 6787633..61e21b8 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -110,11 +110,12 @@ protected:
            V != DenseMapInfo<Value *>::getTombstoneKey();
   }
 
-private:
+public:
   // Callbacks made from Value.
   static void ValueIsDeleted(Value *V);
   static void ValueIsRAUWd(Value *Old, Value *New);
 
+private:
   // Internal implementation details.
   ValueHandleBase **getPrevPtr() const { return PrevPair.getPointer(); }
   HandleBaseKind getKind() const { return PrevPair.getInt(); }
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 03adcce..c406bca 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -28,6 +28,24 @@ class SubRegIndex<list<SubRegIndex> comps = []> {
   // ComposedOf - A list of two SubRegIndex instances, [A, B].
   // This indicates that this SubRegIndex is the result of composing A and B.
   list<SubRegIndex> ComposedOf = comps;
+
+  // CoveringSubRegIndices - A list of two or more sub-register indexes that
+  // cover this sub-register.
+  //
+  // This field should normally be left blank as TableGen can infer it.
+  //
+  // TableGen automatically detects sub-registers that straddle the registers
+  // in the SubRegs field of a Register definition. For example:
+  //
+  //   Q0    = dsub_0 -> D0, dsub_1 -> D1
+  //   Q1    = dsub_0 -> D2, dsub_1 -> D3
+  //   D1_D2 = dsub_0 -> D1, dsub_1 -> D2
+  //   QQ0   = qsub_0 -> Q0, qsub_1 -> Q1
+  //
+  // TableGen will infer that D1_D2 is a sub-register of QQ0. It will be given
+  // the synthetic index dsub_1_dsub_2 unless some SubRegIndex is defined with
+  // CoveringSubRegIndices = [dsub_1, dsub_2].
+  list<SubRegIndex> CoveringSubRegIndices = [];
 }
 
 // RegAltNameIndex - The alternate name set to use for register operands of
@@ -64,18 +82,6 @@ class Register<string n, list<string> altNames = []> {
   // register.
   list<RegAltNameIndex> RegAltNameIndices = [];
 
-  // CompositeIndices - Specify subreg indices that don't correspond directly to
-  // a register in SubRegs and are not inherited. The following formats are
-  // supported:
-  //
-  // (a)     Identity  - Reg:a == Reg
-  // (a b)   Alias     - Reg:a == Reg:b
-  // (a b,c) Composite - Reg:a == (Reg:b):c
-  //
-  // This can be used to disambiguate a sub-sub-register that exists in more
-  // than one subregister and other weird stuff.
-  list<dag> CompositeIndices = [];
-
   // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
   // These values can be determined by locating the <target>.h file in the
   // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
@@ -252,9 +258,6 @@ class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> {
   // SubRegIndices - N SubRegIndex instances. This provides the names of the
   // sub-registers in the synthesized super-registers.
   list<SubRegIndex> SubRegIndices = Indices;
-
-  // Compose sub-register indices like in a normal Register.
-  list<dag> CompositeIndices = [];
 }
 
 
@@ -336,6 +339,7 @@ class Instruction {
   bit isCompare    = 0;     // Is this instruction a comparison instruction?
   bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
   bit isBitcast    = 0;     // Is this instruction a bitcast instruction?
+  bit isSelect     = 0;     // Is this instruction a select instruction?
   bit isBarrier    = 0;     // Can control flow fall through this instruction?
   bit isCall       = 0;     // Is this instruction a call instruction?
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
@@ -749,6 +753,10 @@ class AsmParser {
   // function of the AsmParser class to call on every matched instruction.
   // This can be used to perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
+
+  //ShouldEmitMatchRegisterName - Set to false if the target needs a hand
+  //written register name matcher
+  bit ShouldEmitMatchRegisterName = 1;
 }
 def DefaultAsmParser : AsmParser;
 
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 73efc50..da30ab8 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGET_TARGETINSTRINFO_H
 #define LLVM_TARGET_TARGETINSTRINFO_H
 
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,6 +28,7 @@ class MachineMemOperand;
 class MachineRegisterInfo;
 class MDNode;
 class MCInst;
+class MCSchedModel;
 class SDNode;
 class ScheduleHazardRecognizer;
 class SelectionDAG;
@@ -186,14 +188,6 @@ public:
                              const MachineInstr *Orig,
                              const TargetRegisterInfo &TRI) const = 0;
 
-  /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-  /// two-addrss instruction inserted by two-address pass.
-  virtual void scheduleTwoAddrSource(MachineInstr *SrcMI,
-                                     MachineInstr *UseMI,
-                                     const TargetRegisterInfo &TRI) const {
-    // Do nothing.
-  }
-
   /// duplicate - Create a duplicate of the Orig instruction in MF. This is like
   /// MachineFunction::CloneMachineInstr(), but the target may update operands
   /// that are required to be unique.
@@ -419,6 +413,51 @@ public:
     llvm_unreachable("Target didn't implement TargetInstrInfo::insertSelect!");
   }
 
+  /// analyzeSelect - Analyze the given select instruction, returning true if
+  /// it cannot be understood. It is assumed that MI->isSelect() is true.
+  ///
+  /// When successful, return the controlling condition and the operands that
+  /// determine the true and false result values.
+  ///
+  ///   Result = SELECT Cond, TrueOp, FalseOp
+  ///
+  /// Some targets can optimize select instructions, for example by predicating
+  /// the instruction defining one of the operands. Such targets should set
+  /// Optimizable.
+  ///
+  /// @param         MI Select instruction to analyze.
+  /// @param Cond    Condition controlling the select.
+  /// @param TrueOp  Operand number of the value selected when Cond is true.
+  /// @param FalseOp Operand number of the value selected when Cond is false.
+  /// @param Optimizable Returned as true if MI is optimizable.
+  /// @returns False on success.
+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const {
+    assert(MI && MI->isSelect() && "MI must be a select instruction");
+    return true;
+  }
+
+  /// optimizeSelect - Given a select instruction that was understood by
+  /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by
+  /// merging it with one of its operands. Returns NULL on failure.
+  ///
+  /// When successful, returns the new select instruction. The client is
+  /// responsible for deleting MI.
+  ///
+  /// If both sides of the select can be optimized, PreferFalse is used to pick
+  /// a side.
+  ///
+  /// @param MI          Optimizable select instruction.
+  /// @param PreferFalse Try to optimize FalseOp instead of TrueOp.
+  /// @returns Optimized instruction or NULL.
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI,
+                                       bool PreferFalse = false) const {
+    // This function must be implemented if Optimizable is ever set.
+    llvm_unreachable("Target must implement TargetInstrInfo::optimizeSelect!");
+  }
+
   /// copyPhysReg - Emit instructions to copy a pair of physical registers.
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
@@ -693,6 +732,20 @@ public:
     return false;
   }
 
+  /// optimizeLoadInstr - Try to remove the load by folding it to a register
+  /// operand at the use. We fold the load instructions if and only if the
+  /// def and use are in the same BB. We only look at one load and see
+  /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+  /// defined by the load we are trying to fold. DefMI returns the machine
+  /// instruction that defines FoldAsLoadDefReg, and the function returns
+  /// the machine instruction generated due to folding.
+  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+                        const MachineRegisterInfo *MRI,
+                        unsigned &FoldAsLoadDefReg,
+                        MachineInstr *&DefMI) const {
+    return 0;
+  }
+
   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
   /// instruction, try to fold the immediate into the use instruction.
   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -775,7 +828,7 @@ public:
                               SDNode *Node) const = 0;
 
   /// Return the default expected latency for a def based on it's opcode.
-  unsigned defaultDefLatency(const InstrItineraryData *ItinData,
+  unsigned defaultDefLatency(const MCSchedModel *SchedModel,
                              const MachineInstr *DefMI) const;
 
   /// isHighLatencyDef - Return true if this opcode has high latency to its
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index c8cacf2..ea2874f 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -18,36 +18,47 @@ namespace llvm {
 
   namespace LibFunc {
     enum Func {
+      /// int __cxa_atexit(void (*f)(void *), void *p, void *d);
+      cxa_atexit,
+      /// void __cxa_guard_abort(guard_t *guard);
+      /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi.
+      cxa_guard_abort,      
+      /// int __cxa_guard_acquire(guard_t *guard);
+      cxa_guard_acquire,
+      /// void __cxa_guard_release(guard_t *guard);
+      cxa_guard_release,
+      /// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
+      memcpy_chk,
       /// double acos(double x);
       acos,
-      /// long double acosl(long double x);
-      acosl,
       /// float acosf(float x);
       acosf,
+      /// long double acosl(long double x);
+      acosl,
       /// double asin(double x);
       asin,
-      /// long double asinl(long double x);
-      asinl,
       /// float asinf(float x);
       asinf,
+      /// long double asinl(long double x);
+      asinl,
       /// double atan(double x);
       atan,
-      /// long double atanl(long double x);
-      atanl,
-      /// float atanf(float x);
-      atanf,
       /// double atan2(double y, double x);
       atan2,
-      /// long double atan2l(long double y, long double x);
-      atan2l,
       /// float atan2f(float y, float x);
       atan2f,
+      /// long double atan2l(long double y, long double x);
+      atan2l,
+      /// float atanf(float x);
+      atanf,
+      /// long double atanl(long double x);
+      atanl,
       /// double ceil(double x);
       ceil,
-      /// long double ceill(long double x);
-      ceill,
       /// float ceilf(float x);
       ceilf,
+      /// long double ceill(long double x);
+      ceill,
       /// double copysign(double x, double y);
       copysign,
       /// float copysignf(float x, float y);
@@ -56,54 +67,56 @@ namespace llvm {
       copysignl,
       /// double cos(double x);
       cos,
-      /// long double cosl(long double x);
-      cosl,
       /// float cosf(float x);
       cosf,
       /// double cosh(double x);
       cosh,
-      /// long double coshl(long double x);
-      coshl,
       /// float coshf(float x);
       coshf,
+      /// long double coshl(long double x);
+      coshl,
+      /// long double cosl(long double x);
+      cosl,
       /// double exp(double x);
       exp,
-      /// long double expl(long double x);
-      expl,
-      /// float expf(float x);
-      expf,
       /// double exp2(double x);
       exp2,
-      /// long double exp2l(long double x);
-      exp2l,
       /// float exp2f(float x);
       exp2f,
+      /// long double exp2l(long double x);
+      exp2l,
+      /// float expf(float x);
+      expf,
+      /// long double expl(long double x);
+      expl,
       /// double expm1(double x);
       expm1,
-      /// long double expm1l(long double x);
-      expm1l,
       /// float expm1f(float x);
       expm1f,
+      /// long double expm1l(long double x);
+      expm1l,
       /// double fabs(double x);
       fabs,
-      /// long double fabsl(long double x);
-      fabsl,
       /// float fabsf(float x);
       fabsf,
+      /// long double fabsl(long double x);
+      fabsl,
+      /// int fiprintf(FILE *stream, const char *format, ...);
+      fiprintf,
       /// double floor(double x);
       floor,
-      /// long double floorl(long double x);
-      floorl,
       /// float floorf(float x);
       floorf,
-      /// int fiprintf(FILE *stream, const char *format, ...);
-      fiprintf,
+      /// long double floorl(long double x);
+      floorl,
       /// double fmod(double x, double y);
       fmod,
-      /// long double fmodl(long double x, long double y);
-      fmodl,
       /// float fmodf(float x, float y);
       fmodf,
+      /// long double fmodl(long double x, long double y);
+      fmodl,
+      /// int fputc(int c, FILE *stream);
+      fputc,
       /// int fputs(const char *s, FILE *stream);
       fputs,
       /// size_t fwrite(const void *ptr, size_t size, size_t nitems,
@@ -113,28 +126,32 @@ namespace llvm {
       iprintf,
       /// double log(double x);
       log,
-      /// long double logl(long double x);
-      logl,
-      /// float logf(float x);
-      logf,
-      /// double log2(double x);
-      log2,
-      /// double long double log2l(long double x);
-      log2l,
-      /// float log2f(float x);
-      log2f,
       /// double log10(double x);
       log10,
-      /// long double log10l(long double x);
-      log10l,
       /// float log10f(float x);
       log10f,
+      /// long double log10l(long double x);
+      log10l,
       /// double log1p(double x);
       log1p,
-      /// long double log1pl(long double x);
-      log1pl,
       /// float log1pf(float x);
       log1pf,
+      /// long double log1pl(long double x);
+      log1pl,
+      /// double log2(double x);
+      log2,
+      /// float log2f(float x);
+      log2f,
+      /// double long double log2l(long double x);
+      log2l,
+      /// float logf(float x);
+      logf,
+      /// long double logl(long double x);
+      logl,
+      /// void *memchr(const void *s, int c, size_t n);
+      memchr,
+      /// int memcmp(const void *s1, const void *s2, size_t n);
+      memcmp,
       /// void *memcpy(void *s1, const void *s2, size_t n);
       memcpy,
       /// void *memmove(void *s1, const void *s2, size_t n);
@@ -155,6 +172,10 @@ namespace llvm {
       powf,
       /// long double powl(long double x, long double y);
       powl,
+      /// int putchar(int c);
+      putchar,
+      /// int puts(const char *s);
+      puts,
       /// double rint(double x);
       rint,
       /// float rintf(float x);
@@ -169,51 +190,58 @@ namespace llvm {
       roundl,
       /// double sin(double x);
       sin,
-      /// long double sinl(long double x);
-      sinl,
       /// float sinf(float x);
       sinf,
       /// double sinh(double x);
       sinh,
-      /// long double sinhl(long double x);
-      sinhl,
       /// float sinhf(float x);
       sinhf,
+      /// long double sinhl(long double x);
+      sinhl,
+      /// long double sinl(long double x);
+      sinl,
       /// int siprintf(char *str, const char *format, ...);
       siprintf,
       /// double sqrt(double x);
       sqrt,
-      /// long double sqrtl(long double x);
-      sqrtl,
       /// float sqrtf(float x);
       sqrtf,
+      /// long double sqrtl(long double x);
+      sqrtl,
+      /// char *strcat(char *s1, const char *s2);
+      strcat,
+      /// char *strchr(const char *s, int c);
+      strchr,
+      /// char *strcpy(char *s1, const char *s2);
+      strcpy,
+      /// size_t strlen(const char *s);
+      strlen,
+      /// char *strncat(char *s1, const char *s2, size_t n);
+      strncat,
+      /// int strncmp(const char *s1, const char *s2, size_t n);
+      strncmp,
+      /// char *strncpy(char *s1, const char *s2, size_t n);
+      strncpy,
+      /// size_t strnlen(const char *s, size_t maxlen);
+      strnlen,
       /// double tan(double x);
       tan,
-      /// long double tanl(long double x);
-      tanl,
       /// float tanf(float x);
       tanf,
       /// double tanh(double x);
       tanh,
-      /// long double tanhl(long double x);
-      tanhl,
       /// float tanhf(float x);
       tanhf,
+      /// long double tanhl(long double x);
+      tanhl,
+      /// long double tanl(long double x);
+      tanl,
       /// double trunc(double x);
       trunc,
       /// float truncf(float x);
       truncf,
       /// long double truncl(long double x);
       truncl,
-      /// int __cxa_atexit(void (*f)(void *), void *p, void *d);
-      cxa_atexit,
-      /// void __cxa_guard_abort(guard_t *guard);
-      /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi.
-      cxa_guard_abort,      
-      /// int __cxa_guard_acquire(guard_t *guard);
-      cxa_guard_acquire,
-      /// void __cxa_guard_release(guard_t *guard);
-      cxa_guard_release,
 
       NumLibFuncs
     };
@@ -247,12 +275,41 @@ public:
   TargetLibraryInfo(const Triple &T);
   explicit TargetLibraryInfo(const TargetLibraryInfo &TLI);
   
+  /// getLibFunc - Search for a particular function name.  If it is one of the
+  /// known library functions, return true and set F to the corresponding value.
+  bool getLibFunc(StringRef funcName, LibFunc::Func &F) const;
+
   /// has - This function is used by optimizations that want to match on or form
   /// a given library function.
   bool has(LibFunc::Func F) const {
     return getState(F) != Unavailable;
   }
 
+  /// hasOptimizedCodeGen - Return true if the function is both available as
+  /// a builtin and a candidate for optimized code generation.
+  bool hasOptimizedCodeGen(LibFunc::Func F) const {
+    if (getState(F) == Unavailable)
+      return false;
+    switch (F) {
+    default: break;
+    case LibFunc::copysign:  case LibFunc::copysignf:  case LibFunc::copysignl:
+    case LibFunc::fabs:      case LibFunc::fabsf:      case LibFunc::fabsl:
+    case LibFunc::sin:       case LibFunc::sinf:       case LibFunc::sinl:
+    case LibFunc::cos:       case LibFunc::cosf:       case LibFunc::cosl:
+    case LibFunc::sqrt:      case LibFunc::sqrtf:      case LibFunc::sqrtl:
+    case LibFunc::floor:     case LibFunc::floorf:     case LibFunc::floorl:
+    case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl:
+    case LibFunc::ceil:      case LibFunc::ceilf:      case LibFunc::ceill:
+    case LibFunc::rint:      case LibFunc::rintf:      case LibFunc::rintl:
+    case LibFunc::trunc:     case LibFunc::truncf:     case LibFunc::truncl:
+    case LibFunc::log2:      case LibFunc::log2f:      case LibFunc::log2l:
+    case LibFunc::exp2:      case LibFunc::exp2f:      case LibFunc::exp2l:
+    case LibFunc::memcmp:
+      return true;
+    }
+    return false;
+  }
+
   StringRef getName(LibFunc::Func F) const {
     AvailabilityState State = getState(F);
     if (State == Unavailable)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 7ad90ea..acf0419 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -51,6 +51,7 @@ namespace llvm {
   template<typename T> class SmallVectorImpl;
   class TargetData;
   class TargetRegisterClass;
+  class TargetLibraryInfo;
   class TargetLoweringObjectFile;
   class Value;
 
@@ -365,7 +366,9 @@ public:
   /// for it.
   LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
     if (VT.isExtended()) return Expand;
-    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    // If a target-specific SDNode requires legalization, require the target
+    // to provide custom legalization for it.
+    if (Op > array_lengthof(OpActions[0])) return Custom;
     unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
     return (LegalizeAction)OpActions[I][Op];
   }
@@ -1413,7 +1416,8 @@ public:
 
   /// createFastISel - This method returns a target specific FastISel object,
   /// or null if the target does not support "fast" ISel.
-  virtual FastISel *createFastISel(FunctionLoweringInfo &) const {
+  virtual FastISel *createFastISel(FunctionLoweringInfo &,
+                                   const TargetLibraryInfo *) const {
     return 0;
   }
 
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index d1a07d1..68ca567 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -155,6 +155,10 @@ namespace llvm {
     /// automatically realigned, if needed.
     unsigned RealignStack : 1;
 
+    /// SSPBufferSize - The minimum size of buffers that will receive stack
+    /// smashing protection when -fstack-protection is used.
+    unsigned SSPBufferSize;
+
     /// EnableFastISel - This flag enables fast-path instruction selection
     /// which trades away generated code quality in favor of reducing
     /// compile time.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index a5bd7c9..df4d900 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -152,7 +152,7 @@ public:
   }
 
   /// getSuperRegIndices - Returns a 0-terminated list of sub-register indices
-  /// that projec some super-register class into this register class. The list
+  /// that project some super-register class into this register class. The list
   /// has an entry for each Idx such that:
   ///
   ///   There exists SuperRC where:
@@ -349,6 +349,14 @@ public:
     return false;
   }
 
+  /// hasRegUnit - Returns true if Reg contains RegUnit.
+  bool hasRegUnit(unsigned Reg, unsigned RegUnit) const {
+    for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
+      if (*Units == RegUnit)
+        return true;
+    return false;
+  }
+
   /// isSubRegister - Returns true if regB is a sub-register of regA.
   ///
   bool isSubRegister(unsigned regA, unsigned regB) const {
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 5bbed58..4dc488d 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -27,6 +27,7 @@ class SchedMachineModel {
                        // (-1) inorder (0) ooo, (1): inorder +var latencies.
   int LoadLatency = -1; // Cycles for loads to access the cache.
   int HighLatency = -1; // Approximation of cycles for "high latency" ops.
+  int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
 
   ProcessorItineraries Itineraries = NoItineraries;
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index ff006b6..3f81c06 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -411,6 +411,9 @@ def prefetch   : SDNode<"ISD::PREFETCH"   , SDTPrefetch,
                         [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
                          SDNPMemOperand]>;
 
+def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
+                     [SDNPHasChain, SDNPSideEffect]>;
+
 def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
                         [SDNPHasChain, SDNPSideEffect]>;
 
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index 6229cbc..a6e41f0 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -28,41 +28,52 @@ namespace llvm {
   /// EmitStrLen - Emit a call to the strlen function to the builder, for the
   /// specified pointer.  Ptr is required to be some pointer type, and the
   /// return value has 'intptr_t' type.
-  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+                    const TargetLibraryInfo *TLI);
+
+  /// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+  /// specified pointer.  Ptr is required to be some pointer type, MaxLen must
+  /// be of size_t type, and the return value has 'intptr_t' type.
+  Value *EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+                     const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// EmitStrChr - Emit a call to the strchr function to the builder, for the
   /// specified pointer and character.  Ptr is required to be some pointer type,
   /// and the return value has 'i8*' type.
-  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetData *TD,
+                    const TargetLibraryInfo *TLI);
 
   /// EmitStrNCmp - Emit a call to the strncmp function to the builder.
   Value *EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
-                     const TargetData *TD);
+                     const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
   /// specified pointer arguments.
   Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                    const TargetData *TD, StringRef Name = "strcpy");
+                    const TargetData *TD, const TargetLibraryInfo *TLI,
+                    StringRef Name = "strcpy");
 
   /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
   /// specified pointer arguments and length.
   Value *EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
-                    const TargetData *TD, StringRef Name = "strncpy");
+                     const TargetData *TD, const TargetLibraryInfo *TLI,
+                     StringRef Name = "strncpy");
 
   /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
   /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
   /// are pointers.
   Value *EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
-                       IRBuilder<> &B, const TargetData *TD);
+                       IRBuilder<> &B, const TargetData *TD,
+                       const TargetLibraryInfo *TLI);
 
   /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
   /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
   Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
-                    const TargetData *TD);
+                    const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// EmitMemCmp - Emit a call to the memcmp function.
   Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
-                    const TargetData *TD);
+                    const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
   /// (e.g.  'floor').  This function is known to take a single of type matching
@@ -74,26 +85,28 @@ namespace llvm {
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
-  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+                     const TargetLibraryInfo *TLI);
 
   /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
   /// some pointer.
-  void EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD);
+  Value *EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+                  const TargetLibraryInfo *TLI);
 
   /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
   /// an i32, and File is a pointer to FILE.
-  void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
-                 const TargetData *TD);
+  Value *EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                   const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
   /// pointer and File is a pointer to FILE.
-  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD,
-                 const TargetLibraryInfo *TLI);
+  Value *EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD,
+                   const TargetLibraryInfo *TLI);
 
   /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-  void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
-                  const TargetData *TD, const TargetLibraryInfo *TLI);
+  Value *EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+                    const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// SimplifyFortifiedLibCalls - Helper class for folding checked library
   /// calls (e.g. __strcpy_chk) into their unchecked counterparts.
@@ -105,7 +118,7 @@ namespace llvm {
                             bool isString) const = 0;
   public:
     virtual ~SimplifyFortifiedLibCalls();
-    bool fold(CallInst *CI, const TargetData *TD);
+    bool fold(CallInst *CI, const TargetData *TD, const TargetLibraryInfo *TLI);
   };
 }
 
diff --git a/include/llvm/TypeFinder.h b/include/llvm/TypeFinder.h
new file mode 100644
index 0000000..5d80705
--- /dev/null
+++ b/include/llvm/TypeFinder.h
@@ -0,0 +1,78 @@
+//===-- llvm/TypeFinder.h - Class for finding used struct types -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the TypeFinder class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TYPEFINDER_H
+#define LLVM_TYPEFINDER_H
+
+#include "llvm/ADT/DenseSet.h"
+#include <vector>
+
+namespace llvm {
+
+class MDNode;
+class Module;
+class StructType;
+class Type;
+class Value;
+
+/// TypeFinder - Walk over a module, identifying all of the types that are
+/// used by the module.
+class TypeFinder {
+  // To avoid walking constant expressions multiple times and other IR
+  // objects, we keep several helper maps.
+  DenseSet<const Value*> VisitedConstants;
+  DenseSet<Type*> VisitedTypes;
+
+  std::vector<StructType*> StructTypes;
+  bool OnlyNamed;
+
+public:
+  TypeFinder() : OnlyNamed(false) {}
+
+  void run(const Module &M, bool onlyNamed);
+  void clear();
+
+  typedef std::vector<StructType*>::iterator iterator;
+  typedef std::vector<StructType*>::const_iterator const_iterator;
+
+  iterator begin() { return StructTypes.begin(); }
+  iterator end() { return StructTypes.end(); }
+
+  const_iterator begin() const { return StructTypes.begin(); }
+  const_iterator end() const { return StructTypes.end(); }
+
+  bool empty() const { return StructTypes.empty(); }
+  size_t size() const { return StructTypes.size(); }
+  iterator erase(iterator I, iterator E) { return StructTypes.erase(I, E); }
+
+  StructType *&operator[](unsigned Idx) { return StructTypes[Idx]; }
+
+private:
+  /// incorporateType - This method adds the type to the list of used
+  /// structures if it's not in there already.
+  void incorporateType(Type *Ty);
+
+  /// incorporateValue - This method is used to walk operand lists finding types
+  /// hiding in constant expressions and other operands that won't be walked in
+  /// other ways.  GlobalValues, basic blocks, instructions, and inst operands
+  /// are all explicitly enumerated.
+  void incorporateValue(const Value *V);
+
+  /// incorporateMDNode - This method is used to walk the operands of an MDNode
+  /// to find types hiding within.
+  void incorporateMDNode(const MDNode *V);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 2730ce6..b255ce6 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1,4 +1,4 @@
-//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -78,6 +78,19 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
 static const uint32_t FPH_TAKEN_WEIGHT = 20;
 static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
 
+/// \brief Invoke-terminating normal branch taken weight
+///
+/// This is the weight for branching to the normal destination of an invoke
+/// instruction. We expect this to happen most of the time. Set the weight to an
+/// absurdly high value so that nested loops subsume it.
+static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
+
+/// \brief Invoke-terminating normal branch not-taken weight.
+///
+/// This is the weight for branching to the unwind destination of an invoke
+/// instruction. This is essentially never taken.
+static const uint32_t IH_NONTAKEN_WEIGHT = 1;
+
 // Standard weight value. Used when none of the heuristics set weight for
 // the edge.
 static const uint32_t NORMAL_WEIGHT = 16;
@@ -371,6 +384,19 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
   return true;
 }
 
+bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
+  InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator());
+  if (!II)
+    return false;
+
+  BasicBlock *Normal = II->getNormalDest();
+  BasicBlock *Unwind = II->getUnwindDest();
+
+  setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT);
+  return true;
+}
+
 void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfo>();
   AU.setPreservesAll();
@@ -397,7 +423,9 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) {
       continue;
     if (calcZeroHeuristics(*I))
       continue;
-    calcFloatingPointHeuristics(*I);
+    if (calcFloatingPointHeuristics(*I))
+      continue;
+    calcInvokeHeuristics(*I);
   }
 
   PostDominatedByUnreachable.clear();
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 7ced848..f5e619c 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -358,17 +358,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
       NumElts = AT->getNumElements();
     else
       NumElts = cast<VectorType>(C->getType())->getNumElements();
-    
+
     for (; Index != NumElts; ++Index) {
       if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
                               BytesLeft, TD))
         return false;
-      if (EltSize >= BytesLeft)
+
+      uint64_t BytesWritten = EltSize - Offset;
+      assert(BytesWritten <= EltSize && "Not indexing into this element?");
+      if (BytesWritten >= BytesLeft)
         return true;
-      
+
       Offset = 0;
-      BytesLeft -= EltSize;
-      CurPtr += EltSize;
+      BytesLeft -= BytesWritten;
+      CurPtr += BytesWritten;
     }
     return true;
   }
@@ -600,6 +603,22 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   return C;
 }
 
+/// Strip the pointer casts, but preserve the address space information.
+static Constant* StripPtrCastKeepAS(Constant* Ptr) {
+  assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
+  PointerType *OldPtrTy = cast<PointerType>(Ptr->getType());
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
+  PointerType *NewPtrTy = cast<PointerType>(Ptr->getType());
+
+  // Preserve the address space number of the pointer.
+  if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
+    NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
+      OldPtrTy->getAddressSpace());
+    Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+  }
+  return Ptr;
+}
+
 /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
@@ -636,13 +655,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
       }
       return 0;
     }
-  
+
   unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
   APInt Offset =
     APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(),
                                          makeArrayRef((Value **)Ops.data() + 1,
                                                       Ops.size() - 1)));
-  Ptr = cast<Constant>(Ptr->stripPointerCasts());
+  Ptr = StripPtrCastKeepAS(Ptr);
 
   // If this is a GEP of a GEP, fold it all into a single GEP.
   while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
@@ -661,7 +680,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
     Ptr = cast<Constant>(GEP->getOperand(0));
     Offset += APInt(BitWidth,
                     TD->getIndexedOffset(Ptr->getType(), NestedOps));
-    Ptr = cast<Constant>(Ptr->stripPointerCasts());
+    Ptr = StripPtrCastKeepAS(Ptr);
   }
 
   // If the base value for this address is a literal integer value, fold the
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index a6bf4a8..bc1ecd2 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -797,9 +797,33 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     FiftyPercentVectorBonus = Threshold;
     TenPercentVectorBonus = Threshold / 2;
 
-    // Subtract off one instruction per call argument as those will be free after
-    // inlining.
-    Cost -= CS.arg_size() * InlineConstants::InstrCost;
+    // Give out bonuses per argument, as the instructions setting them up will
+    // be gone after inlining.
+    for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
+      if (TD && CS.isByValArgument(I)) {
+        // We approximate the number of loads and stores needed by dividing the
+        // size of the byval type by the target's pointer size.
+        PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
+        unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
+        unsigned PointerSize = TD->getPointerSizeInBits();
+        // Ceiling division.
+        unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
+
+        // If it generates more than 8 stores it is likely to be expanded as an
+        // inline memcpy so we take that as an upper bound. Otherwise we assume
+        // one load and one store per word copied.
+        // FIXME: The maxStoresPerMemcpy setting from the target should be used
+        // here instead of a magic number of 8, but it's not available via
+        // TargetData.
+        NumStores = std::min(NumStores, 8U);
+
+        Cost -= 2 * NumStores * InlineConstants::InstrCost;
+      } else {
+        // For non-byval arguments subtract off one instruction per call
+        // argument.
+        Cost -= InlineConstants::InstrCost;
+      }
+    }
 
     // If there is only one call of the function, and it has internal linkage,
     // the cost of inlining it drops dramatically.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 16a9a04..379a35a 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -1719,10 +1719,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return ConstantInt::get(ITy, false);
 
       // A local identified object (alloca or noalias call) can't equal any
-      // incoming argument, unless they're both null.
-      if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
-          Pred == CmpInst::ICMP_EQ)
-        return ConstantInt::get(ITy, false);
+      // incoming argument, unless they're both null or they belong to
+      // different functions. The latter happens during inlining.
+      if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr))
+        if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr))
+          if (LHSInst->getParent()->getParent() == RHSArg->getParent() &&
+              Pred == CmpInst::ICMP_EQ)
+            return ConstantInt::get(ITy, false);
     }
 
     // Assume that the constant null is on the right.
@@ -1732,14 +1735,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       else if (Pred == CmpInst::ICMP_NE)
         return ConstantInt::get(ITy, true);
     }
-  } else if (isa<Argument>(LHSPtr)) {
+  } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) {
     RHSPtr = RHSPtr->stripInBoundsOffsets();
-    // An alloca can't be equal to an argument.
-    if (isa<AllocaInst>(RHSPtr)) {
-      if (Pred == CmpInst::ICMP_EQ)
-        return ConstantInt::get(ITy, false);
-      else if (Pred == CmpInst::ICMP_NE)
-        return ConstantInt::get(ITy, true);
+    // An alloca can't be equal to an argument unless they come from separate
+    // functions via inlining.
+    if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) {
+      if (LHSArg->getParent() == RHSInst->getParent()->getParent()) {
+        if (Pred == CmpInst::ICMP_EQ)
+          return ConstantInt::get(ITy, false);
+        else if (Pred == CmpInst::ICMP_NE)
+          return ConstantInt::get(ITy, true);
+      }
     }
   }
 
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 8d99ec3..b986b32 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -64,7 +64,7 @@ static const AllocFnsTy AllocationFnData[] = {
   {"realloc",             ReallocLike, 2, 1,  -1},
   {"reallocf",            ReallocLike, 2, 1,  -1},
   {"strdup",              StrDupLike,  1, -1, -1},
-  {"strndup",             StrDupLike,  2, -1, -1}
+  {"strndup",             StrDupLike,  2, 1,  -1}
 };
 
 
@@ -358,11 +358,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD,
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
   V = V->stripPointerCasts();
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If we have already seen this instruction, bail out. Cycles can happen in
+    // unreachable code after constant propagation.
+    if (!SeenInsts.insert(I))
+      return unknown();
 
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-    return visitGEPOperator(*GEP);
-  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      return visitGEPOperator(*GEP);
     return visit(*I);
+  }
   if (Argument *A = dyn_cast<Argument>(V))
     return visitArgument(*A);
   if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
@@ -371,9 +376,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
     return visitGlobalVariable(*GV);
   if (UndefValue *UV = dyn_cast<UndefValue>(V))
     return visitUndefValue(*UV);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     if (CE->getOpcode() == Instruction::IntToPtr)
       return unknown(); // clueless
+    if (CE->getOpcode() == Instruction::GetElementPtr)
+      return visitGEPOperator(cast<GEPOperator>(*CE));
+  }
 
   DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V
         << '\n');
@@ -414,8 +422,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
 
   // handle strdup-like functions separately
   if (FnData->AllocTy == StrDupLike) {
-    // TODO
-    return unknown();
+    APInt Size(IntTyBits, GetStringLength(CS.getArgument(0)));
+    if (!Size)
+      return unknown();
+
+    // strndup limits strlen
+    if (FnData->FstParam > 0) {
+      ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
+      if (!Arg)
+        return unknown();
+
+      APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
+      if (Size.ugt(MaxSize))
+        Size = MaxSize + 1;
+    }
+    return std::make_pair(Size, Zero);
   }
 
   ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam));
@@ -512,8 +533,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
 
 ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD,
                                                      LLVMContext &Context)
-: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)),
-Visitor(TD, Context) {
+: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)) {
   IntTy = TD->getIntPtrType(Context);
   Zero = ConstantInt::get(IntTy, 0);
 }
@@ -538,6 +558,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
+  ObjectSizeOffsetVisitor Visitor(TD, Context);
   SizeOffsetType Const = Visitor.compute(V);
   if (Visitor.bothKnown(Const))
     return std::make_pair(ConstantInt::get(Context, Const.first),
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 7fb154d..059e574 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -227,13 +227,18 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 
         // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
         // keep scanning.
-        break;
+        continue;
       default:
         return MemDepResult::getClobber(Inst);
       }
     }
+
+    // If we could not obtain a pointer for the instruction and the instruction
+    // touches memory then assume that this is a dependency.
+    if (MR != AliasAnalysis::NoModRef)
+      return MemDepResult::getClobber(Inst);
   }
-  
+
   // No dependence found.  If this is the entry block of the function, it is
   // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 5f4458b..868f483 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -262,22 +262,6 @@ Region::const_block_node_iterator Region::block_node_end() const {
   return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
 }
 
-Region::block_iterator Region::block_begin() {
-  return block_node_begin();
-}
-
-Region::block_iterator Region::block_end() {
-  return block_node_end();
-}
-
-Region::const_block_iterator Region::block_begin() const {
-  return block_node_begin();
-}
-
-Region::const_block_iterator Region::block_end() const {
-  return block_node_end();
-}
-
 Region::element_iterator Region::element_begin() {
   return GraphTraits<Region*>::nodes_begin(this);
 }
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f0f3b1c..a654648 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -5370,6 +5370,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     SqrtTerm *= B;
     SqrtTerm -= Four * (A * C);
 
+    if (SqrtTerm.isNegative()) {
+      // The loop is provably infinite.
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
     // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
     // integer value or else APInt::sqrt() will assert.
     APInt SqrtVal(SqrtTerm.sqrt());
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 68873e2..5cfc810 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -82,14 +82,9 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
   ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
   At += sizeof(ArchiveMemberHeader);
 
-  // Extract the size and determine if the file is
-  // compressed or not (negative length).
   int flags = 0;
   int MemberSize = atoi(Hdr->size);
-  if (MemberSize < 0) {
-    flags |= ArchiveMember::CompressedFlag;
-    MemberSize = -MemberSize;
-  }
+  assert(MemberSize >= 0);
 
   // Check the size of the member for sanity
   if (At + MemberSize > End) {
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 9ef2943..ec6b4b8 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -204,7 +204,6 @@ Archive::writeMember(
   std::ofstream& ARFile,
   bool CreateSymbolTable,
   bool TruncateNames,
-  bool ShouldCompress,
   std::string* ErrMsg
 ) {
 
@@ -349,7 +348,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) {
 // table, flattening the file names (no directories, 15 chars max) and
 // compressing each archive member.
 bool
-Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames,
                      std::string* ErrMsg)
 {
   // Make sure they haven't opened up the file, not loaded it,
@@ -394,7 +393,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
   // builds the symbol table, symTab.
   for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
     if (writeMember(*I, ArchiveFile, CreateSymbolTable,
-                     TruncateNames, Compress, ErrMsg)) {
+                     TruncateNames, ErrMsg)) {
       TmpArchive.eraseFromDisk();
       ArchiveFile.close();
       return true;
@@ -446,7 +445,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
     // compatibility with other ar(1) implementations as well as allowing the
     // archive to store both native .o and LLVM .bc files, both indexed.
     if (foreignST) {
-      if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+      if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) {
         FinalFile.close();
         TmpArchive.eraseFromDisk();
         return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 670c1bb..e045804 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -456,11 +456,12 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(private);
   KEYWORD(linker_private);
   KEYWORD(linker_private_weak);
-  KEYWORD(linker_private_weak_def_auto);
+  KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility.
   KEYWORD(internal);
   KEYWORD(available_externally);
   KEYWORD(linkonce);
   KEYWORD(linkonce_odr);
+  KEYWORD(linkonce_odr_auto_hide);
   KEYWORD(weak);
   KEYWORD(weak_odr);
   KEYWORD(appending);
@@ -553,6 +554,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(naked);
   KEYWORD(nonlazybind);
   KEYWORD(address_safety);
+  KEYWORD(ia_nsdialect);
 
   KEYWORD(type);
   KEYWORD(opaque);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 095b7c5..a9c7e98 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -184,12 +184,13 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_private:             // OptionalLinkage
     case lltok::kw_linker_private:      // OptionalLinkage
     case lltok::kw_linker_private_weak: // OptionalLinkage
-    case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
+    case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat.
     case lltok::kw_internal:            // OptionalLinkage
     case lltok::kw_weak:                // OptionalLinkage
     case lltok::kw_weak_odr:            // OptionalLinkage
     case lltok::kw_linkonce:            // OptionalLinkage
     case lltok::kw_linkonce_odr:        // OptionalLinkage
+    case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage
     case lltok::kw_appending:           // OptionalLinkage
     case lltok::kw_dllexport:           // OptionalLinkage
     case lltok::kw_common:              // OptionalLinkage
@@ -576,8 +577,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
       Linkage != GlobalValue::InternalLinkage &&
       Linkage != GlobalValue::PrivateLinkage &&
       Linkage != GlobalValue::LinkerPrivateLinkage &&
-      Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
-      Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+      Linkage != GlobalValue::LinkerPrivateWeakLinkage)
     return Error(LinkageLoc, "invalid linkage type for alias");
 
   Constant *Aliasee;
@@ -962,6 +962,7 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
     case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
     case lltok::kw_address_safety:  Attrs |= Attribute::AddressSafety; break;
+    case lltok::kw_ia_nsdialect:    Attrs |= Attribute::IANSDialect; break;
 
     case lltok::kw_alignstack: {
       unsigned Alignment;
@@ -989,12 +990,12 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
 ///   ::= 'private'
 ///   ::= 'linker_private'
 ///   ::= 'linker_private_weak'
-///   ::= 'linker_private_weak_def_auto'
 ///   ::= 'internal'
 ///   ::= 'weak'
 ///   ::= 'weak_odr'
 ///   ::= 'linkonce'
 ///   ::= 'linkonce_odr'
+///   ::= 'linkonce_odr_auto_hide'
 ///   ::= 'available_externally'
 ///   ::= 'appending'
 ///   ::= 'dllexport'
@@ -1011,14 +1012,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
   case lltok::kw_linker_private_weak:
     Res = GlobalValue::LinkerPrivateWeakLinkage;
     break;
-  case lltok::kw_linker_private_weak_def_auto:
-    Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
-    break;
   case lltok::kw_internal:       Res = GlobalValue::InternalLinkage;      break;
   case lltok::kw_weak:           Res = GlobalValue::WeakAnyLinkage;       break;
   case lltok::kw_weak_odr:       Res = GlobalValue::WeakODRLinkage;       break;
   case lltok::kw_linkonce:       Res = GlobalValue::LinkOnceAnyLinkage;   break;
   case lltok::kw_linkonce_odr:   Res = GlobalValue::LinkOnceODRLinkage;   break;
+  case lltok::kw_linkonce_odr_auto_hide:
+  case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat.
+    Res = GlobalValue::LinkOnceODRAutoHideLinkage;
+    break;
   case lltok::kw_available_externally:
     Res = GlobalValue::AvailableExternallyLinkage;
     break;
@@ -2652,11 +2654,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   case GlobalValue::PrivateLinkage:
   case GlobalValue::LinkerPrivateLinkage:
   case GlobalValue::LinkerPrivateWeakLinkage:
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
   case GlobalValue::InternalLinkage:
   case GlobalValue::AvailableExternallyLinkage:
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
   case GlobalValue::DLLExportLinkage:
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 0461e7b..9fd63f2 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -37,8 +37,10 @@ namespace lltok {
     kw_global,  kw_constant,
 
     kw_private, kw_linker_private, kw_linker_private_weak,
-    kw_linker_private_weak_def_auto, kw_internal,
-    kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+    kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility.
+    kw_internal,
+    kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide,
+    kw_weak, kw_weak_odr, kw_appending,
     kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
     kw_default, kw_hidden, kw_protected,
     kw_unnamed_addr,
@@ -105,6 +107,7 @@ namespace lltok {
     kw_naked,
     kw_nonlazybind,
     kw_address_safety,
+    kw_ia_nsdialect,
 
     kw_type,
     kw_opaque,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4ffee38..65fd52e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -89,7 +89,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
   case 12: return GlobalValue::AvailableExternallyLinkage;
   case 13: return GlobalValue::LinkerPrivateLinkage;
   case 14: return GlobalValue::LinkerPrivateWeakLinkage;
-  case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+  case 15: return GlobalValue::LinkOnceODRAutoHideLinkage;
   }
 }
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5b1725f..1d2dfc3 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -365,7 +365,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
   case GlobalValue::AvailableExternallyLinkage:      return 12;
   case GlobalValue::LinkerPrivateLinkage:            return 13;
   case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:      return 15;
   }
   llvm_unreachable("Invalid linkage");
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b48b5af..7364f42 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -220,16 +220,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
   case GlobalValue::CommonLinkage:
   case GlobalValue::LinkOnceAnyLinkage:
   case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
   case GlobalValue::WeakAnyLinkage:
   case GlobalValue::WeakODRLinkage:
   case GlobalValue::LinkerPrivateWeakLinkage:
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
     if (MAI->getWeakDefDirective() != 0) {
       // .globl _foo
       OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
 
       if ((GlobalValue::LinkageTypes)Linkage !=
-          GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+          GlobalValue::LinkOnceODRAutoHideLinkage)
         // .weak_definition _foo
         OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
       else
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d231665..d30e5bb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing dwarf compile unit.
+// This file contains support for constructing a dwarf compile unit.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index d240389..2e189ad 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen
   MachineSSAUpdater.cpp
   MachineScheduler.cpp
   MachineSink.cpp
+  MachineTraceMetrics.cpp
   MachineVerifier.cpp
   OcamlGC.cpp
   OptimizePHIs.cpp
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 9840a40..f9347ef 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,12 +17,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "early-ifcvt"
+#include "MachineTraceMetrics.h"
 #include "llvm/Function.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +32,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -48,7 +51,10 @@ BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
 static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
   cl::desc("Turn all knobs to 11"));
 
-typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector;
+STATISTIC(NumDiamondsSeen,  "Number of diamonds");
+STATISTIC(NumDiamondsConv,  "Number of diamonds converted");
+STATISTIC(NumTrianglesSeen, "Number of triangles");
+STATISTIC(NumTrianglesConv, "Number of triangles converted");
 
 //===----------------------------------------------------------------------===//
 //                                 SSAIfConv
@@ -94,6 +100,12 @@ public:
   /// equal to Tail.
   bool isTriangle() const { return TBB == Tail || FBB == Tail; }
 
+  /// Returns the Tail predecessor for the True side.
+  MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
+
+  /// Returns the Tail predecessor for the  False side.
+  MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
+
   /// Information about each phi in the Tail block.
   struct PHIInfo {
     MachineInstr *PHI;
@@ -132,6 +144,12 @@ private:
   /// Find a valid insertion point in Head.
   bool findInsertionPoint();
 
+  /// Replace PHI instructions in Tail with selects.
+  void replacePHIInstrs();
+
+  /// Insert selects and rewrite PHI operands to use them.
+  void rewritePHIOperands();
+
 public:
   /// runOnMachineFunction - Initialize per-function data structures.
   void runOnMachineFunction(MachineFunction &MF) {
@@ -335,11 +353,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
   if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
     return false;
 
-  // We could support additional Tail predecessors by updating phis instead of
-  // eliminating them. Let's see an example where it matters first.
   Tail = Succ0->succ_begin()[0];
-  if (Tail->pred_size() != 2)
-    return false;
 
   // This is not a triangle.
   if (Tail != Succ1) {
@@ -389,8 +403,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
 
   // Any phis in the tail block must be convertible to selects.
   PHIs.clear();
-  MachineBasicBlock *TPred = TBB == Tail ? Head : TBB;
-  MachineBasicBlock *FPred = FBB == Tail ? Head : FBB;
+  MachineBasicBlock *TPred = getTPred();
+  MachineBasicBlock *FPred = getFPred();
   for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
        I != E && I->isPHI(); ++I) {
     PHIs.push_back(&*I);
@@ -426,24 +440,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
   if (!findInsertionPoint())
     return false;
 
+  if (isTriangle())
+    ++NumTrianglesSeen;
+  else
+    ++NumDiamondsSeen;
   return true;
 }
 
-
-/// convertIf - Execute the if conversion after canConvertIf has determined the
-/// feasibility.
-///
-/// Any basic blocks erased will be added to RemovedBlocks.
-///
-void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
-  assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
-
-  // Move all instructions into Head, except for the terminators.
-  if (TBB != Tail)
-    Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
-  if (FBB != Tail)
-    Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
-
+/// replacePHIInstrs - Completely replace PHI instructions with selects.
+/// This is possible when the only Tail predecessors are the if-converted
+/// blocks.
+void SSAIfConv::replacePHIInstrs() {
+  assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
   MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
   assert(FirstTerm != Head->end() && "No terminators");
   DebugLoc HeadDL = FirstTerm->getDebugLoc();
@@ -459,6 +467,66 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
     PI.PHI->eraseFromParent();
     PI.PHI = 0;
   }
+}
+
+/// rewritePHIOperands - When there are additional Tail predecessors, insert
+/// select instructions in Head and rewrite PHI operands to use the selects.
+/// Keep the PHI instructions in Tail to handle the other predecessors.
+void SSAIfConv::rewritePHIOperands() {
+  MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
+  assert(FirstTerm != Head->end() && "No terminators");
+  DebugLoc HeadDL = FirstTerm->getDebugLoc();
+
+  // Convert all PHIs to select instructions inserted before FirstTerm.
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+    PHIInfo &PI = PHIs[i];
+    DEBUG(dbgs() << "If-converting " << *PI.PHI);
+    unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+    unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
+    TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+    DEBUG(dbgs() << "          --> " << *llvm::prior(FirstTerm));
+
+    // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
+    for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
+      MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
+      if (MBB == getTPred()) {
+        PI.PHI->getOperand(i-1).setMBB(Head);
+        PI.PHI->getOperand(i-2).setReg(DstReg);
+      } else if (MBB == getFPred()) {
+        PI.PHI->RemoveOperand(i-1);
+        PI.PHI->RemoveOperand(i-2);
+      }
+    }
+    DEBUG(dbgs() << "          --> " << *PI.PHI);
+  }
+}
+
+/// convertIf - Execute the if conversion after canConvertIf has determined the
+/// feasibility.
+///
+/// Any basic blocks erased will be added to RemovedBlocks.
+///
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+  assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
+
+  // Update statistics.
+  if (isTriangle())
+    ++NumTrianglesConv;
+  else
+    ++NumDiamondsConv;
+
+  // Move all instructions into Head, except for the terminators.
+  if (TBB != Tail)
+    Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
+  if (FBB != Tail)
+    Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
+
+  // Are there extra Tail predecessors?
+  bool ExtraPreds = Tail->pred_size() != 2;
+  if (ExtraPreds)
+    rewritePHIOperands();
+  else
+    replacePHIInstrs();
 
   // Fix up the CFG, temporarily leave Head without any successors.
   Head->removeSuccessor(TBB);
@@ -470,6 +538,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
 
   // Fix up Head's terminators.
   // It should become a single branch or a fallthrough.
+  DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
   TII->RemoveBranch(*Head);
 
   // Erase the now empty conditional blocks. It is likely that Head can fall
@@ -484,7 +553,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
   }
 
   assert(Head->succ_empty() && "Additional head successors?");
-  if (Head->isLayoutSuccessor(Tail)) {
+  if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
     // Splice Tail onto the end of Head.
     DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber()
                  << " into head BB#" << Head->getNumber() << '\n');
@@ -512,9 +581,12 @@ namespace {
 class EarlyIfConverter : public MachineFunctionPass {
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  const MCSchedModel *SchedModel;
   MachineRegisterInfo *MRI;
   MachineDominatorTree *DomTree;
   MachineLoopInfo *Loops;
+  MachineTraceMetrics *Traces;
+  MachineTraceMetrics::Ensemble *MinInstr;
   SSAIfConv IfConv;
 
 public:
@@ -527,6 +599,8 @@ private:
   bool tryConvertIf(MachineBasicBlock*);
   void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
   void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
+  void invalidateTraces();
+  bool shouldConvertIf();
 };
 } // end anonymous namespace
 
@@ -537,6 +611,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter,
                       "early-ifcvt", "Early If Converter", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
 INITIALIZE_PASS_END(EarlyIfConverter,
                       "early-ifcvt", "Early If Converter", false, false)
 
@@ -546,6 +621,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<MachineTraceMetrics>();
+  AU.addPreserved<MachineTraceMetrics>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -576,12 +653,117 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
     Loops->removeBlock(Removed[i]);
 }
 
+/// Invalidate MachineTraceMetrics before if-conversion.
+void EarlyIfConverter::invalidateTraces() {
+  Traces->verifyAnalysis();
+  Traces->invalidate(IfConv.Head);
+  Traces->invalidate(IfConv.Tail);
+  Traces->invalidate(IfConv.TBB);
+  Traces->invalidate(IfConv.FBB);
+  Traces->verifyAnalysis();
+}
+
+// Adjust cycles with downward saturation.
+static unsigned adjCycles(unsigned Cyc, int Delta) {
+  if (Delta < 0 && Cyc + Delta > Cyc)
+    return 0;
+  return Cyc + Delta;
+}
+
+/// Apply cost model and heuristics to the if-conversion in IfConv.
+/// Return true if the conversion is a good idea.
+///
+bool EarlyIfConverter::shouldConvertIf() {
+  // Stress testing mode disables all cost considerations.
+  if (Stress)
+    return true;
+
+  if (!MinInstr)
+    MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+
+  MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
+  MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
+  DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
+  unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
+                              FBBTrace.getCriticalPath());
+
+  // Set a somewhat arbitrary limit on the critical path extension we accept.
+  unsigned CritLimit = SchedModel->MispredictPenalty/2;
+
+  // If-conversion only makes sense when there is unexploited ILP. Compute the
+  // maximum-ILP resource length of the trace after if-conversion. Compare it
+  // to the shortest critical path.
+  SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
+  if (IfConv.TBB != IfConv.Tail)
+    ExtraBlocks.push_back(IfConv.TBB);
+  unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
+  DEBUG(dbgs() << "Resource length " << ResLength
+               << ", minimal critical path " << MinCrit << '\n');
+  if (ResLength > MinCrit + CritLimit) {
+    DEBUG(dbgs() << "Not enough available ILP.\n");
+    return false;
+  }
+
+  // Assume that the depth of the first head terminator will also be the depth
+  // of the select instruction inserted, as determined by the flag dependency.
+  // TBB / FBB data dependencies may delay the select even more.
+  MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
+  unsigned BranchDepth =
+    HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth;
+  DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
+
+  // Look at all the tail phis, and compute the critical path extension caused
+  // by inserting select instructions.
+  MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
+  for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
+    SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
+    unsigned Slack = TailTrace.getInstrSlack(PI.PHI);
+    unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth;
+    DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
+
+    // The condition is pulled into the critical path.
+    unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
+    if (CondDepth > MaxDepth) {
+      unsigned Extra = CondDepth - MaxDepth;
+      DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
+      if (Extra > CritLimit) {
+        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+        return false;
+      }
+    }
+
+    // The TBB value is pulled into the critical path.
+    unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles);
+    if (TDepth > MaxDepth) {
+      unsigned Extra = TDepth - MaxDepth;
+      DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
+      if (Extra > CritLimit) {
+        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+        return false;
+      }
+    }
+
+    // The FBB value is pulled into the critical path.
+    unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles);
+    if (FDepth > MaxDepth) {
+      unsigned Extra = FDepth - MaxDepth;
+      DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
+      if (Extra > CritLimit) {
+        DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Attempt repeated if-conversion on MBB, return true if successful.
 ///
 bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
   bool Changed = false;
-  while (IfConv.canConvertIf(MBB)) {
+  while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
     // If-convert MBB and update analyses.
+    invalidateTraces();
     SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
     IfConv.convertIf(RemovedBlocks);
     Changed = true;
@@ -597,9 +779,12 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
                << ((Value*)MF.getFunction())->getName() << '\n');
   TII = MF.getTarget().getInstrInfo();
   TRI = MF.getTarget().getRegisterInfo();
+  SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel;
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+  Traces = &getAnalysis<MachineTraceMetrics>();
+  MinInstr = 0;
 
   bool Changed = false;
   IfConv.runOnMachineFunction(MF);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index b14afc2..7a17331 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
   } else {
     TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
                      MI->getOperand(2).isKill());
+
+    // Implicitly define DstReg for subsequent uses.
+    MachineBasicBlock::iterator CopyMI = MI;
+    --CopyMI;
+    CopyMI->addRegisterDefined(DstReg);
+
     // Transfer the kill/dead flags, if needed.
     if (MI->getOperand(0).isDead())
       TransferDeadFlag(MI, DstSubReg, TRI);
-    DEBUG({
-        MachineBasicBlock::iterator dMI = MI;
-        dbgs() << "subreg: " << *(--dMI);
-      });
+    DEBUG(dbgs() << "subreg: " << *CopyMI);
   }
 
   DEBUG(dbgs() << '\n');
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 01077db..0a795e6 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
       valnos.pop_back();
     } while (!valnos.empty() && valnos.back()->isUnused());
   } else {
-    ValNo->setIsUnused(true);
+    ValNo->markUnused();
   }
 }
 
@@ -667,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
     }
   }
 
-  // Merge the relevant flags.
-  V2->mergeFlags(V1);
-
   // Now that V1 is dead, remove it.
   markValNoForDeletion(V1);
 
@@ -737,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const {
       } else {
         OS << vni->def;
         if (vni->isPHIDef())
-          OS << "-phidef";
-        if (vni->hasPHIKill())
-          OS << "-phikill";
+          OS << "-phi";
       }
     }
   }
@@ -827,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
     MachineOperand &MO = RI.getOperand();
     MachineInstr *MI = MO.getParent();
     ++RI;
-    if (MO.isUse() && MO.isUndef())
-      continue;
     // DBG_VALUE instructions should have been eliminated earlier.
-    SlotIndex Idx = LIS.getInstructionIndex(MI);
-    Idx = Idx.getRegSlot(MO.isUse());
-    const VNInfo *VNI = LI.getVNInfoAt(Idx);
-    // FIXME: We should be able to assert(VNI) here, but the coalescer leaves
-    // dangling defs around.
+    LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI));
+    const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined();
+    // In the case of an <undef> use that isn't tied to any def, VNI will be
+    // NULL. If the use is tied to a def, VNI will be the defined value.
     if (!VNI)
       continue;
     MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 819707f..d0f8ae1 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,13 @@
 #include <cmath>
 using namespace llvm;
 
+// Switch to the new experimental algorithm for computing live intervals.
+static cl::opt<bool>
+NewLiveIntervals("new-live-intervals", cl::Hidden,
+                 cl::desc("Use new algorithm forcomputing live intervals"));
+
 char LiveIntervals::ID = 0;
+char &llvm::LiveIntervalsID = LiveIntervals::ID;
 INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
@@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   AllocatableRegs = TRI->getAllocatableSet(fn);
   ReservedRegs = TRI->getReservedRegs(fn);
 
-  computeIntervals();
+  // Allocate space for all virtual registers.
+  VirtRegIntervals.resize(MRI->getNumVirtRegs());
+
+  if (NewLiveIntervals) {
+    // This is the new way of computing live intervals.
+    // It is independent of LiveVariables, and it can run at any time.
+    computeVirtRegs();
+    computeRegMasks();
+  } else {
+    // This is the old way of computing live intervals.
+    // It depends on LiveVariables.
+    computeIntervals();
+  }
   computeLiveInRegUnits();
 
   DEBUG(dump());
@@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // new valno in the killing blocks.
       assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
       DEBUG(dbgs() << " phi-join");
-      ValNo->setHasPHIKill(true);
     } else {
       // Iterate over all of the blocks that the variable is completely
       // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
@@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         assert(getInstructionFromIndex(Start) == 0 &&
                "PHI def index points at actual instruction.");
         ValNo = interval.getNextValue(Start, VNInfoAllocator);
-        ValNo->setIsPHIDef(true);
       }
       LiveRange LR(Start, killIdx, ValNo);
       interval.addRange(LR);
@@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       SlotIndex killIndex = getMBBEndIdx(mbb);
       LiveRange LR(defIndex, killIndex, ValNo);
       interval.addRange(LR);
-      ValNo->setHasPHIKill(true);
       DEBUG(dbgs() << " phi-join +" << LR);
     } else {
       llvm_unreachable("Multiply defined register");
@@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
 }
 
 
+/// computeVirtRegInterval - Compute the live interval of a virtual register,
+/// based on defs and uses.
+void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) {
+  assert(LRCalc && "LRCalc not initialized.");
+  assert(LI->empty() && "Should only compute empty intervals.");
+  LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
+  LRCalc->createDeadDefs(LI);
+  LRCalc->extendToUses(LI);
+}
+
+void LiveIntervals::computeVirtRegs() {
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI->reg_nodbg_empty(Reg))
+      continue;
+    LiveInterval *LI = createInterval(Reg);
+    VirtRegIntervals[Reg] = LI;
+    computeVirtRegInterval(LI);
+  }
+}
+
+void LiveIntervals::computeRegMasks() {
+  RegMaskBlocks.resize(MF->getNumBlockIDs());
+
+  // Find all instructions with regmask operands.
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+    RMB.first = RegMaskSlots.size();
+    for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
+         MI != ME; ++MI)
+      for (MIOperands MO(MI); MO.isValid(); ++MO) {
+        if (!MO->isRegMask())
+          continue;
+          RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
+          RegMaskBits.push_back(MO->getRegMask());
+      }
+    // Compute the number of register mask instructions in this block.
+    RMB.second = RegMaskSlots.size() - RMB.first;;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                           Register Unit Liveness
 //===----------------------------------------------------------------------===//
@@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
-      VNI->setIsUnused(true);
+      VNI->markUnused();
       NewLI.removeRange(*LII);
       DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
       CanSeparate = true;
@@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
   return MBB1 == MBB2 ? MBB1 : NULL;
 }
 
+bool
+LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
+  for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+       I != E; ++I) {
+    const VNInfo *PHI = *I;
+    if (PHI->isUnused() || !PHI->isPHIDef())
+      continue;
+    const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def);
+    // Conservatively return true instead of scanning huge predecessor lists.
+    if (PHIMBB->pred_size() > 100)
+      return true;
+    for (MachineBasicBlock::const_pred_iterator
+         PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
+      if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+        return true;
+  }
+  return false;
+}
+
 float
 LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
   // Limit the loop depth ridiculousness.
@@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
   VNInfo* VN = Interval.getNextValue(
     SlotIndex(getInstructionIndex(startInst).getRegSlot()),
     getVNInfoAllocator());
-  VN->setHasPHIKill(true);
   LiveRange LR(
      SlotIndex(getInstructionIndex(startInst).getRegSlot()),
      getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 9384075..d828f25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) {
         .getRegSlot(I.getOperand().isEarlyClobber());
 
     // Create the def in LI. This may find an existing def.
-    VNInfo *VNI = LI->createDeadDef(Idx, *Alloc);
-    VNI->setIsPHIDef(MI->isPHI());
+    LI->createDeadDef(Idx, *Alloc);
   }
 }
 
@@ -320,7 +319,6 @@ void LiveRangeCalc::updateSSA() {
         SlotIndex Start, End;
         tie(Start, End) = Indexes->getMBBRange(MBB);
         VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
-        VNI->setIsPHIDef(true);
         I->Value = VNI;
         // This block is done, we know the final value.
         I->DomNode = 0;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 896fdbf..b4ce9aa 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -239,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
 
       // Collect virtual registers to be erased after MI is gone.
       SmallVector<unsigned, 8> RegsToErase;
+      bool ReadsPhysRegs = false;
 
       // Check for live intervals that may shrink
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
@@ -246,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         if (!MOI->isReg())
           continue;
         unsigned Reg = MOI->getReg();
-        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+          // Check if MI reads any unreserved physregs.
+          if (Reg && MOI->readsReg() && !LIS.isReserved(Reg))
+            ReadsPhysRegs = true;
           continue;
+        }
         LiveInterval &LI = LIS.getInterval(Reg);
 
         // Shrink read registers, unless it is likely to be expensive and
@@ -271,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
         }
       }
 
-      if (TheDelegate)
-        TheDelegate->LRE_WillEraseInstruction(MI);
-      LIS.RemoveMachineInstrFromMaps(MI);
-      MI->eraseFromParent();
-      ++NumDCEDeleted;
+      // Currently, we don't support DCE of physreg live ranges. If MI reads
+      // any unreserved physregs, don't erase the instruction, but turn it into
+      // a KILL instead. This way, the physreg live ranges don't end up
+      // dangling.
+      // FIXME: It would be better to have something like shrinkToUses() for
+      // physregs. That could potentially enable more DCE and it would free up
+      // the physreg. It would not happen often, though.
+      if (ReadsPhysRegs) {
+        MI->setDesc(TII.get(TargetOpcode::KILL));
+        // Remove all operands that aren't physregs.
+        for (unsigned i = MI->getNumOperands(); i; --i) {
+          const MachineOperand &MO = MI->getOperand(i-1);
+          if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+            continue;
+          MI->RemoveOperand(i-1);
+        }
+        DEBUG(dbgs() << "Converted physregs to:\t" << *MI);
+      } else {
+        if (TheDelegate)
+          TheDelegate->LRE_WillEraseInstruction(MI);
+        LIS.RemoveMachineInstrFromMaps(MI);
+        MI->eraseFromParent();
+        ++NumDCEDeleted;
+      }
 
       // Erase any virtregs that are now empty and unused. There may be <undef>
       // uses around. Keep the empty live range in that case.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index ecc1e95..cf13dbd 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() != 0 && "machine instruction not in a basic block");
 
   // Remove from the use/def lists.
-  N->RemoveRegOperandsFromUseLists();
+  if (MachineFunction *MF = N->getParent()->getParent())
+    N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
 
   N->setParent(0);
 
@@ -310,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   if (!succ_empty()) {
     if (Indexes) OS << '\t';
     OS << "    Successors according to CFG:";
-    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
       OS << " BB#" << (*SI)->getNumber();
+      if (!Weights.empty())
+        OS << '(' << *getWeightIterator(SI) << ')';
+    }
     OS << '\n';
   }
 }
@@ -477,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) {
 
 void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
                                          MachineBasicBlock *New) {
-  uint32_t weight = 0;
-  succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+  if (Old == New)
+    return;
 
-  // If Weight list is empty it means we don't use it (disabled optimization).
-  if (!Weights.empty()) {
-    weight_iterator WI = getWeightIterator(SI);
-    weight = *WI;
+  succ_iterator E = succ_end();
+  succ_iterator NewI = E;
+  succ_iterator OldI = E;
+  for (succ_iterator I = succ_begin(); I != E; ++I) {
+    if (*I == Old) {
+      OldI = I;
+      if (NewI != E)
+        break;
+    }
+    if (*I == New) {
+      NewI = I;
+      if (OldI != E)
+        break;
+    }
   }
+  assert(OldI != E && "Old is not a successor of this block");
+  Old->removePredecessor(this);
 
-  // Update the successor information.
-  removeSuccessor(SI);
-  addSuccessor(New, weight);
+  // If New isn't already a successor, let it take Old's place.
+  if (NewI == E) {
+    New->addPredecessor(this);
+    *OldI = New;
+    return;
+  }
+
+  // New is already a successor.
+  // Update its weight instead of adding a duplicate edge.
+  if (!Weights.empty()) {
+    weight_iterator OldWI = getWeightIterator(OldI);
+    *getWeightIterator(NewI) += *OldWI;
+    Weights.erase(OldWI);
+  }
+  Successors.erase(OldI);
 }
 
 void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
@@ -507,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
 
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    uint32_t weight = 0;
-
+    uint32_t Weight = 0;
 
     // If Weight list is empty it means we don't use it (disabled optimization).
     if (!fromMBB->Weights.empty())
-      weight = *fromMBB->Weights.begin();
+      Weight = *fromMBB->Weights.begin();
 
-    addSuccessor(Succ, weight);
+    addSuccessor(Succ, Weight);
     fromMBB->removeSuccessor(Succ);
   }
 }
@@ -526,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
 
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    addSuccessor(Succ);
+    uint32_t Weight = 0;
+    if (!fromMBB->Weights.empty())
+      Weight = *fromMBB->Weights.begin();
+    addSuccessor(Succ, Weight);
     fromMBB->removeSuccessor(Succ);
 
     // Fix up any PHI nodes in the successor.
@@ -540,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
   }
 }
 
+bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
+  return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+}
+
 bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
-  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB);
-  return I != Successors.end();
+  return std::find(succ_begin(), succ_end(), MBB) != succ_end();
 }
 
 bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -909,12 +942,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
 
 /// getSuccWeight - Return weight of the edge from this block to MBB.
 ///
-uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
+uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
   if (Weights.empty())
     return 0;
 
-  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
-  return *getWeightIterator(I);
+  return *getWeightIterator(Succ);
 }
 
 /// getWeightIterator - Return wight iterator corresonding to the I successor
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 5a15f92..c4dca2c 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // boiler plate.
     Cond.clear();
     MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
-    if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+    if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
+      // If PrevBB has a two-way branch, try to re-order the branches
+      // such that we branch to the successor with higher weight first.
+      if (TBB && !Cond.empty() && FBB &&
+          MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+          !TII->ReverseBranchCondition(Cond)) {
+        DEBUG(dbgs() << "Reverse order of the two branches: "
+                     << getBlockName(PrevBB) << "\n");
+        DEBUG(dbgs() << "    Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
+                     << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+        DebugLoc dl;  // FIXME: this is nowhere
+        TII->RemoveBranch(*PrevBB);
+        TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
+      }
       PrevBB->updateTerminator();
+    }
   }
 
   // Fixup the last block.
@@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
 
   // Walk through the backedges of the function now that we have fully laid out
   // the basic blocks and align the destination of each backedge. We don't rely
-  // on the loop info here so that we can align backedges in unnatural CFGs and
-  // backedges that were introduced purely because of the loop rotations done
-  // during this layout pass.
-  // FIXME: This isn't quite right, we shouldn't align backedges that result
-  // from blocks being sunken below the exit block for the function.
+  // exclusively on the loop info here so that we can align backedges in
+  // unnatural CFGs and backedges that were introduced purely because of the
+  // loop rotations done during this layout pass.
   if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     return;
   unsigned Align = TLI->getPrefLoopAlignment();
   if (!Align)
     return;  // Don't care about loop alignment.
+  if (FunctionChain.begin() == FunctionChain.end())
+    return;  // Empty chain.
 
-  SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks;
-  for (BlockChain::iterator BI = FunctionChain.begin(),
+  const BranchProbability ColdProb(1, 5); // 20%
+  BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+  BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
+  for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()),
                             BE = FunctionChain.end();
        BI != BE; ++BI) {
-    PreviousBlocks.insert(*BI);
-    // Set alignment on the destination of all the back edges in the new
-    // ordering.
-    for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(),
-                                          SE = (*BI)->succ_end();
-         SI != SE; ++SI)
-      if (PreviousBlocks.count(*SI))
-        (*SI)->setAlignment(Align);
+    // Don't align non-looping basic blocks. These are unlikely to execute
+    // enough times to matter in practice. Note that we'll still handle
+    // unnatural CFGs inside of a natural outer loop (the common case) and
+    // rotated loops.
+    MachineLoop *L = MLI->getLoopFor(*BI);
+    if (!L)
+      continue;
+
+    // If the block is cold relative to the function entry don't waste space
+    // aligning it.
+    BlockFrequency Freq = MBFI->getBlockFreq(*BI);
+    if (Freq < WeightedEntryFreq)
+      continue;
+
+    // If the block is cold relative to its loop header, don't align it
+    // regardless of what edges into the block exist.
+    MachineBasicBlock *LoopHeader = L->getHeader();
+    BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader);
+    if (Freq < (LoopHeaderFreq * ColdProb))
+      continue;
+
+    // Check for the existence of a non-layout predecessor which would benefit
+    // from aligning this block.
+    MachineBasicBlock *LayoutPred = *llvm::prior(BI);
+
+    // Force alignment if all the predecessors are jumps. We already checked
+    // that the block isn't cold above.
+    if (!LayoutPred->isSuccessor(*BI)) {
+      (*BI)->setAlignment(Align);
+      continue;
+    }
+
+    // Align this block if the layout predecessor's edge into this block is
+    // cold relative to the block. When this is true, othe predecessors make up
+    // all of the hot entries into the block and thus alignment is likely to be
+    // important.
+    BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI);
+    BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb;
+    if (LayoutEdgeFreq <= (Freq * ColdProb))
+      (*BI)->setAlignment(Align);
   }
 }
 
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 0cc1af0..4479211 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
   Scale = 1;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     Sum += Weight;
   }
 
@@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
   Sum = 0;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     Sum += Weight / Scale;
   }
   assert(Sum <= UINT32_MAX);
   return Sum;
 }
 
-uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
-                                            const MachineBasicBlock *Dst) const {
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+              MachineBasicBlock::const_succ_iterator Dst) const {
   uint32_t Weight = Src->getSuccWeight(Dst);
   if (!Weight)
     return DEFAULT_WEIGHT;
   return Weight;
 }
 
+uint32_t MachineBranchProbabilityInfo::
+getEdgeWeight(const MachineBasicBlock *Src,
+              const MachineBasicBlock *Dst) const {
+  // This is a linear search. Try to use the const_succ_iterator version when
+  // possible.
+  return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+}
+
 bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
                                              MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
@@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
   MachineBasicBlock *MaxSucc = 0;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, *I);
+    uint32_t Weight = getEdgeWeight(MBB, I);
     if (Weight > MaxWeight) {
       MaxWeight = Weight;
       MaxSucc = *I;
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 9cfe9ab..896461f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -215,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     if (MO.isDef() &&
         (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
       continue;
-    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-      PhysRefs.insert(*AI);
+    // Reading constant physregs is ok.
+    if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+        PhysRefs.insert(*AI);
     if (MO.isDef())
       PhysDefs.push_back(Reg);
   }
@@ -324,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
                                    MachineInstr *CSMI, MachineInstr *MI) {
   // FIXME: Heuristics that works around the lack the live range splitting.
 
+  // If CSReg is used at all uses of Reg, CSE should not increase register
+  // pressure of CSReg.
+  bool MayIncreasePressure = true;
+  if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
+      TargetRegisterInfo::isVirtualRegister(Reg)) {
+    MayIncreasePressure = false;
+    SmallPtrSet<MachineInstr*, 8> CSUses;
+    for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+      MachineInstr *Use = &*I;
+      CSUses.insert(Use);
+    }
+    for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+      MachineInstr *Use = &*I;
+      if (!CSUses.count(Use)) {
+        MayIncreasePressure = true;
+        break;
+      }
+    }
+  }
+  if (!MayIncreasePressure) return true;
+
   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
@@ -394,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
   bool Changed = false;
 
   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
     MachineInstr *MI = &*I;
     ++I;
@@ -463,15 +489,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // Check if it's profitable to perform this CSE.
     bool DoCSE = true;
-    unsigned NumDefs = MI->getDesc().getNumDefs();
+    unsigned NumDefs = MI->getDesc().getNumDefs() +
+                       MI->getDesc().getNumImplicitDefs();
+    
     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isDef())
         continue;
       unsigned OldReg = MO.getReg();
       unsigned NewReg = CSMI->getOperand(i).getReg();
-      if (OldReg == NewReg)
+
+      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+      // we should make sure it is not dead at CSMI.
+      if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
+        ImplicitDefsToUpdate.push_back(i);
+      if (OldReg == NewReg) {
+        --NumDefs;
         continue;
+      }
 
       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
              TargetRegisterInfo::isVirtualRegister(NewReg) &&
@@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         MRI->clearKillFlags(CSEPairs[i].second);
       }
 
+      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
+      // we should make sure it is not dead at CSMI.
+      for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
+        CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
+
       if (CrossMBBPhysDef) {
         // Add physical register defs now coming in from a predecessor to MBB
         // livein list.
@@ -526,6 +566,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
       Exps.push_back(MI);
     }
     CSEPairs.clear();
+    ImplicitDefsToUpdate.clear();
   }
 
   return Changed;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 8dada05..b166849 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -47,55 +47,6 @@ using namespace llvm;
 // MachineOperand Implementation
 //===----------------------------------------------------------------------===//
 
-/// AddRegOperandToRegInfo - Add this register operand to the specified
-/// MachineRegisterInfo.  If it is null, then the next/prev fields should be
-/// explicitly nulled out.
-void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
-  assert(isReg() && "Can only add reg operand to use lists");
-
-  // If the reginfo pointer is null, just explicitly null out or next/prev
-  // pointers, to ensure they are not garbage.
-  if (RegInfo == 0) {
-    Contents.Reg.Prev = 0;
-    Contents.Reg.Next = 0;
-    return;
-  }
-
-  // Otherwise, add this operand to the head of the registers use/def list.
-  MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
-
-  // For SSA values, we prefer to keep the definition at the start of the list.
-  // we do this by skipping over the definition if it is at the head of the
-  // list.
-  if (*Head && (*Head)->isDef())
-    Head = &(*Head)->Contents.Reg.Next;
-
-  Contents.Reg.Next = *Head;
-  if (Contents.Reg.Next) {
-    assert(getReg() == Contents.Reg.Next->getReg() &&
-           "Different regs on the same list!");
-    Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
-  }
-
-  Contents.Reg.Prev = Head;
-  *Head = this;
-}
-
-/// RemoveRegOperandFromRegInfo - Remove this register operand from the
-/// MachineRegisterInfo it is linked with.
-void MachineOperand::RemoveRegOperandFromRegInfo() {
-  assert(isOnRegUseList() && "Reg operand is not on a use list");
-  // Unlink this from the doubly linked list of operands.
-  MachineOperand *NextOp = Contents.Reg.Next;
-  *Contents.Reg.Prev = NextOp;
-  if (NextOp) {
-    assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
-    NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
-  }
-  Contents.Reg.Prev = 0;
-  Contents.Reg.Next = 0;
-}
-
 void MachineOperand::setReg(unsigned Reg) {
   if (getReg() == Reg) return; // No change.
 
@@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) {
   if (MachineInstr *MI = getParent())
     if (MachineBasicBlock *MBB = MI->getParent())
       if (MachineFunction *MF = MBB->getParent()) {
-        RemoveRegOperandFromRegInfo();
+        MachineRegisterInfo &MRI = MF->getRegInfo();
+        MRI.removeRegOperandFromUseList(this);
         SmallContents.RegNo = Reg;
-        AddRegOperandToRegInfo(&MF->getRegInfo());
+        MRI.addRegOperandToUseList(this);
         return;
       }
 
@@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
   setReg(Reg);
 }
 
+/// Change a def to a use, or a use to a def.
+void MachineOperand::setIsDef(bool Val) {
+  assert(isReg() && "Wrong MachineOperand accessor");
+  assert((!Val || !isDebug()) && "Marking a debug operation as def");
+  if (IsDef == Val)
+    return;
+  // MRI may keep uses and defs in different list positions.
+  if (MachineInstr *MI = getParent())
+    if (MachineBasicBlock *MBB = MI->getParent())
+      if (MachineFunction *MF = MBB->getParent()) {
+        MachineRegisterInfo &MRI = MF->getRegInfo();
+        MRI.removeRegOperandFromUseList(this);
+        IsDef = Val;
+        MRI.addRegOperandToUseList(this);
+        return;
+      }
+  IsDef = Val;
+}
+
 /// ChangeToImmediate - Replace this operand with a new immediate operand of
 /// the specified value.  If an operand is known to be an immediate already,
 /// the setImm method should be used.
 void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
   // If this operand is currently a register operand, and if this is in a
   // function, deregister the operand from the register's use/def list.
-  if (isReg() && getParent() && getParent()->getParent() &&
-      getParent()->getParent()->getParent())
-    RemoveRegOperandFromRegInfo();
+  if (isReg() && isOnRegUseList())
+    if (MachineInstr *MI = getParent())
+      if (MachineBasicBlock *MBB = MI->getParent())
+        if (MachineFunction *MF = MBB->getParent())
+          MF->getRegInfo().removeRegOperandFromUseList(this);
 
   OpKind = MO_Immediate;
   Contents.ImmVal = ImmVal;
@@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
 void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
                                       bool isKill, bool isDead, bool isUndef,
                                       bool isDebug) {
-  // If this operand is already a register operand, use setReg to update the
+  MachineRegisterInfo *RegInfo = 0;
+  if (MachineInstr *MI = getParent())
+    if (MachineBasicBlock *MBB = MI->getParent())
+      if (MachineFunction *MF = MBB->getParent())
+        RegInfo = &MF->getRegInfo();
+  // If this operand is already a register operand, remove it from the
   // register's use/def lists.
-  if (isReg()) {
-    assert(!isEarlyClobber());
-    setReg(Reg);
-  } else {
-    // Otherwise, change this to a register and set the reg#.
-    OpKind = MO_Register;
-    SmallContents.RegNo = Reg;
-
-    // If this operand is embedded in a function, add the operand to the
-    // register's use/def list.
-    if (MachineInstr *MI = getParent())
-      if (MachineBasicBlock *MBB = MI->getParent())
-        if (MachineFunction *MF = MBB->getParent())
-          AddRegOperandToRegInfo(&MF->getRegInfo());
-  }
+  if (RegInfo && isReg())
+    RegInfo->removeRegOperandFromUseList(this);
 
+  // Change this to a register and set the reg#.
+  OpKind = MO_Register;
+  SmallContents.RegNo = Reg;
+  SubReg = 0;
   IsDef = isDef;
   IsImp = isImp;
   IsKill = isKill;
@@ -182,7 +151,13 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsInternalRead = false;
   IsEarlyClobber = false;
   IsDebug = isDebug;
-  SubReg = 0;
+  // Ensure isOnRegUseList() returns false.
+  Contents.Reg.Prev = 0;
+
+  // If this operand is embedded in a function, add the operand to the
+  // register's use/def list.
+  if (RegInfo)
+    RegInfo->addRegOperandToUseList(this);
 }
 
 /// isIdenticalTo - Return true if this operand is identical to the specified
@@ -208,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
   case MachineOperand::MO_FrameIndex:
     return getIndex() == Other.getIndex();
   case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_TargetIndex:
     return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
   case MachineOperand::MO_JumpTableIndex:
     return getIndex() == Other.getIndex();
@@ -245,6 +221,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
   case MachineOperand::MO_FrameIndex:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
   case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_TargetIndex:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(),
                         MO.getOffset());
   case MachineOperand::MO_JumpTableIndex:
@@ -353,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
+  case MachineOperand::MO_TargetIndex:
+    OS << "<ti#" << getIndex();
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
   case MachineOperand::MO_JumpTableIndex:
     OS << "<jt#" << getIndex() << '>';
     break;
@@ -650,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
 /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
-void MachineInstr::RemoveRegOperandsFromUseLists() {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     if (Operands[i].isReg())
-      Operands[i].RemoveRegOperandFromRegInfo();
-  }
+      MRI.removeRegOperandFromUseList(&Operands[i]);
 }
 
 /// AddRegOperandsToUseLists - Add all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands not be on their use lists yet.
-void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
-  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
     if (Operands[i].isReg())
-      Operands[i].AddRegOperandToRegInfo(&RegInfo);
-  }
+      MRI.addRegOperandToUseList(&Operands[i]);
 }
 
-
 /// addOperand - Add the specified operand to the instruction.  If it is an
 /// implicit operand, it is added to the end of the operand list.  If it is
 /// an explicit operand it is added at the end of the explicit operand list
@@ -695,7 +674,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
     while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) {
       --OpNo;
       if (RegInfo)
-        Operands[OpNo].RemoveRegOperandFromRegInfo();
+        RegInfo->removeRegOperandFromUseList(&Operands[OpNo]);
     }
   }
 
@@ -712,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
   if (Reallocate)
     for (unsigned i = 0; i != OpNo; ++i)
       if (Operands[i].isReg())
-        Operands[i].RemoveRegOperandFromRegInfo();
+        RegInfo->removeRegOperandFromUseList(&Operands[i]);
 
   // Insert the new operand at OpNo.
   Operands.insert(Operands.begin() + OpNo, Op);
@@ -723,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
   if (Reallocate)
     for (unsigned i = 0; i != OpNo; ++i)
       if (Operands[i].isReg())
-        Operands[i].AddRegOperandToRegInfo(RegInfo);
+        RegInfo->addRegOperandToUseList(&Operands[i]);
 
   // When adding a register operand, tell RegInfo about it.
   if (Operands[OpNo].isReg()) {
-    // Add the new operand to RegInfo, even when RegInfo is NULL.
-    // This will initialize the linked list pointers.
-    Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+    // Ensure isOnRegUseList() returns false, regardless of Op's status.
+    Operands[OpNo].Contents.Reg.Prev = 0;
+    // Add the new operand to RegInfo.
+    if (RegInfo)
+      RegInfo->addRegOperandToUseList(&Operands[OpNo]);
     // If the register operand is flagged as early, mark the operand as such.
     if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
       Operands[OpNo].setIsEarlyClobber(true);
@@ -739,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
   if (RegInfo) {
     for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) {
       assert(Operands[i].isReg() && "Should only be an implicit reg!");
-      Operands[i].AddRegOperandToRegInfo(RegInfo);
+      RegInfo->addRegOperandToUseList(&Operands[i]);
     }
   }
 }
@@ -749,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 ///
 void MachineInstr::RemoveOperand(unsigned OpNo) {
   assert(OpNo < Operands.size() && "Invalid operand number");
+  MachineRegisterInfo *RegInfo = getRegInfo();
 
   // Special case removing the last one.
   if (OpNo == Operands.size()-1) {
     // If needed, remove from the reg def/use list.
-    if (Operands.back().isReg() && Operands.back().isOnRegUseList())
-      Operands.back().RemoveRegOperandFromRegInfo();
+    if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList())
+      RegInfo->removeRegOperandFromUseList(&Operands.back());
 
     Operands.pop_back();
     return;
@@ -763,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   // Otherwise, we are removing an interior operand.  If we have reginfo to
   // update, remove all operands that will be shifted down from their reg lists,
   // move everything down, then re-add them.
-  MachineRegisterInfo *RegInfo = getRegInfo();
   if (RegInfo) {
     for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
       if (Operands[i].isReg())
-        Operands[i].RemoveRegOperandFromRegInfo();
+        RegInfo->removeRegOperandFromUseList(&Operands[i]);
     }
   }
 
@@ -776,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   if (RegInfo) {
     for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
       if (Operands[i].isReg())
-        Operands[i].AddRegOperandToRegInfo(RegInfo);
+        RegInfo->addRegOperandToUseList(&Operands[i]);
     }
   }
 }
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 82e1235..5fb938f 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
 
   // New virtual register number.
   unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-
-  // Add a reg, but keep track of whether the vector reallocated or not.
-  const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
-  void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
   VRegInfo.grow(Reg);
   VRegInfo[Reg].first = RegClass;
   RegAllocHints.grow(Reg);
-
-  if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
-    // The vector reallocated, handle this now.
-    HandleVRegListReallocation();
   return Reg;
 }
 
@@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() {
   VRegInfo.clear();
 }
 
-/// HandleVRegListReallocation - We just added a virtual register to the
-/// VRegInfo info list and it reallocated.  Update the use/def lists info
-/// pointers.
-void MachineRegisterInfo::HandleVRegListReallocation() {
-  // The back pointers for the vreg lists point into the previous vector.
-  // Update them to point to their correct slots.
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    MachineOperand *List = VRegInfo[Reg].second;
-    if (!List) continue;
-    // Update the back-pointer to be accurate once more.
-    List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+/// Add MO to the linked list of operands for its register.
+void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
+  assert(!MO->isOnRegUseList() && "Already on list");
+  MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+  MachineOperand *const Head = HeadRef;
+
+  // Head points to the first list element.
+  // Next is NULL on the last list element.
+  // Prev pointers are circular, so Head->Prev == Last.
+
+  // Head is NULL for an empty list.
+  if (!Head) {
+    MO->Contents.Reg.Prev = MO;
+    MO->Contents.Reg.Next = 0;
+    HeadRef = MO;
+    return;
+  }
+  assert(MO->getReg() == Head->getReg() && "Different regs on the same list!");
+
+  // Insert MO between Last and Head in the circular Prev chain.
+  MachineOperand *Last = Head->Contents.Reg.Prev;
+  assert(Last && "Inconsistent use list");
+  assert(MO->getReg() == Last->getReg() && "Different regs on the same list!");
+  Head->Contents.Reg.Prev = MO;
+  MO->Contents.Reg.Prev = Last;
+
+  // Def operands always precede uses. This allows def_iterator to stop early.
+  // Insert def operands at the front, and use operands at the back.
+  if (MO->isDef()) {
+    // Insert def at the front.
+    MO->Contents.Reg.Next = Head;
+    HeadRef = MO;
+  } else {
+    // Insert use at the end.
+    MO->Contents.Reg.Next = 0;
+    Last->Contents.Reg.Next = MO;
   }
 }
 
+/// Remove MO from its use-def list.
+void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
+  assert(MO->isOnRegUseList() && "Operand not on use list");
+  MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg());
+  MachineOperand *const Head = HeadRef;
+  assert(Head && "List already empty");
+
+  // Unlink this from the doubly linked list of operands.
+  MachineOperand *Next = MO->Contents.Reg.Next;
+  MachineOperand *Prev = MO->Contents.Reg.Prev;
+
+  // Prev links are circular, next link is NULL instead of looping back to Head.
+  if (MO == Head)
+    HeadRef = Next;
+  else
+    Prev->Contents.Reg.Next = Next;
+
+  (Next ? Next : Head)->Contents.Reg.Prev = Prev;
+
+  MO->Contents.Reg.Prev = 0;
+  MO->Contents.Reg.Next = 0;
+}
+
 /// replaceRegWith - Replace all instances of FromReg with ToReg in the
 /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
 /// except that it also changes any definitions of the register as well.
@@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
   return &*I;
 }
 
-bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
-  use_iterator UI = use_begin(RegNo);
-  if (UI == use_end())
-    return false;
-  return ++UI == use_end();
-}
-
 bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
   use_nodbg_iterator UI = use_nodbg_begin(RegNo);
   if (UI == use_nodbg_end())
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index acb1ee6..076547a 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
 }
 
 MachineSSAUpdater::~MachineSSAUpdater() {
-  delete &getAvailableVals(AV);
+  delete static_cast<AvailableValsTy*>(AV);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1ce546b..bc383cb 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -99,6 +99,16 @@ namespace {
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
   };
+
+  // SuccessorSorter - Sort Successors according to their loop depth. 
+  struct SuccessorSorter {
+    SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {}
+    bool operator()(const MachineBasicBlock *LHS,
+                    const MachineBasicBlock *RHS) const {
+      return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+    }
+    MachineLoopInfo *LI;
+  };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
@@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
 
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to.
-      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-           E = MBB->succ_end(); SI != E; ++SI) {
+      // We give successors with smaller loop depth higher priority.
+      SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
+      std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI));
+      for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(),
+           E = Succs.end(); SI != E; ++SI) {
         MachineBasicBlock *SuccBlock = *SI;
         bool LocalUse = false;
         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
new file mode 100644
index 0000000..1a3aa60
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -0,0 +1,1153 @@
+//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-trace-metrics"
+#include "MachineTraceMetrics.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
+
+using namespace llvm;
+
+char MachineTraceMetrics::ID = 0;
+char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
+
+INITIALIZE_PASS_BEGIN(MachineTraceMetrics,
+                  "machine-trace-metrics", "Machine Trace Metrics", false, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineTraceMetrics,
+                  "machine-trace-metrics", "Machine Trace Metrics", false, true)
+
+MachineTraceMetrics::MachineTraceMetrics()
+  : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
+  std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+}
+
+void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineBranchProbabilityInfo>();
+  AU.addRequired<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
+  MF = &Func;
+  TII = MF->getTarget().getInstrInfo();
+  TRI = MF->getTarget().getRegisterInfo();
+  ItinData = MF->getTarget().getInstrItineraryData();
+  MRI = &MF->getRegInfo();
+  Loops = &getAnalysis<MachineLoopInfo>();
+  BlockInfo.resize(MF->getNumBlockIDs());
+  return false;
+}
+
+void MachineTraceMetrics::releaseMemory() {
+  MF = 0;
+  BlockInfo.clear();
+  for (unsigned i = 0; i != TS_NumStrategies; ++i) {
+    delete Ensembles[i];
+    Ensembles[i] = 0;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                          Fixed block information
+//===----------------------------------------------------------------------===//
+//
+// The number of instructions in a basic block and the CPU resources used by
+// those instructions don't depend on any given trace strategy.
+
+/// Compute the resource usage in basic block MBB.
+const MachineTraceMetrics::FixedBlockInfo*
+MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
+  assert(MBB && "No basic block");
+  FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()];
+  if (FBI->hasResources())
+    return FBI;
+
+  // Compute resource usage in the block.
+  // FIXME: Compute per-functional unit counts.
+  FBI->HasCalls = false;
+  unsigned InstrCount = 0;
+  for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+       I != E; ++I) {
+    const MachineInstr *MI = I;
+    if (MI->isTransient())
+      continue;
+    ++InstrCount;
+    if (MI->isCall())
+      FBI->HasCalls = true;
+  }
+  FBI->InstrCount = InstrCount;
+  return FBI;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Ensemble utility functions
+//===----------------------------------------------------------------------===//
+
+MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
+  : MTM(*ct) {
+  BlockInfo.resize(MTM.BlockInfo.size());
+}
+
+// Virtual destructor serves as an anchor.
+MachineTraceMetrics::Ensemble::~Ensemble() {}
+
+const MachineLoop*
+MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
+  return MTM.Loops->getLoopFor(MBB);
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace above MBB.
+void MachineTraceMetrics::Ensemble::
+computeDepthResources(const MachineBasicBlock *MBB) {
+  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+  // Compute resources from trace above. The top block is simple.
+  if (!TBI->Pred) {
+    TBI->InstrDepth = 0;
+    TBI->Head = MBB->getNumber();
+    return;
+  }
+
+  // Compute from the block above. A post-order traversal ensures the
+  // predecessor is always computed first.
+  TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()];
+  assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet");
+  const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred);
+  TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount;
+  TBI->Head = PredTBI->Head;
+}
+
+// Update resource-related information in the TraceBlockInfo for MBB.
+// Only update resources related to the trace below MBB.
+void MachineTraceMetrics::Ensemble::
+computeHeightResources(const MachineBasicBlock *MBB) {
+  TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+
+  // Compute resources for the current block.
+  TBI->InstrHeight = MTM.getResources(MBB)->InstrCount;
+
+  // The trace tail is done.
+  if (!TBI->Succ) {
+    TBI->Tail = MBB->getNumber();
+    return;
+  }
+
+  // Compute from the block below. A post-order traversal ensures the
+  // predecessor is always computed first.
+  TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()];
+  assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet");
+  TBI->InstrHeight += SuccTBI->InstrHeight;
+  TBI->Tail = SuccTBI->Tail;
+}
+
+// Check if depth resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getDepthResources(const MachineBasicBlock *MBB) const {
+  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  return TBI->hasValidDepth() ? TBI : 0;
+}
+
+// Check if height resources for MBB are valid and return the TBI.
+// Return NULL if the resources have been invalidated.
+const MachineTraceMetrics::TraceBlockInfo*
+MachineTraceMetrics::Ensemble::
+getHeightResources(const MachineBasicBlock *MBB) const {
+  const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
+  return TBI->hasValidHeight() ? TBI : 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Trace Selection Strategies
+//===----------------------------------------------------------------------===//
+//
+// A trace selection strategy is implemented as a sub-class of Ensemble. The
+// trace through a block B is computed by two DFS traversals of the CFG
+// starting from B. One upwards, and one downwards. During the upwards DFS,
+// pickTracePred() is called on the post-ordered blocks. During the downwards
+// DFS, pickTraceSucc() is called in a post-order.
+//
+
+// We never allow traces that leave loops, but we do allow traces to enter
+// nested loops. We also never allow traces to contain back-edges.
+//
+// This means that a loop header can never appear above the center block of a
+// trace, except as the trace head. Below the center block, loop exiting edges
+// are banned.
+//
+// Return true if an edge from the From loop to the To loop is leaving a loop.
+// Either of To and From can be null.
+static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
+  return From && !From->contains(To);
+}
+
+// MinInstrCountEnsemble - Pick the trace that executes the least number of
+// instructions.
+namespace {
+class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
+  const char *getName() const { return "MinInstr"; }
+  const MachineBasicBlock *pickTracePred(const MachineBasicBlock*);
+  const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*);
+
+public:
+  MinInstrCountEnsemble(MachineTraceMetrics *mtm)
+    : MachineTraceMetrics::Ensemble(mtm) {}
+};
+}
+
+// Select the preferred predecessor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
+  if (MBB->pred_empty())
+    return 0;
+  const MachineLoop *CurLoop = getLoopFor(MBB);
+  // Don't leave loops, and never follow back-edges.
+  if (CurLoop && MBB == CurLoop->getHeader())
+    return 0;
+  unsigned CurCount = MTM.getResources(MBB)->InstrCount;
+  const MachineBasicBlock *Best = 0;
+  unsigned BestDepth = 0;
+  for (MachineBasicBlock::const_pred_iterator
+       I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+    const MachineBasicBlock *Pred = *I;
+    const MachineTraceMetrics::TraceBlockInfo *PredTBI =
+      getDepthResources(Pred);
+    // Ignore cycles that aren't natural loops.
+    if (!PredTBI)
+      continue;
+    // Pick the predecessor that would give this block the smallest InstrDepth.
+    unsigned Depth = PredTBI->InstrDepth + CurCount;
+    if (!Best || Depth < BestDepth)
+      Best = Pred, BestDepth = Depth;
+  }
+  return Best;
+}
+
+// Select the preferred successor for MBB.
+const MachineBasicBlock*
+MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
+  if (MBB->pred_empty())
+    return 0;
+  const MachineLoop *CurLoop = getLoopFor(MBB);
+  const MachineBasicBlock *Best = 0;
+  unsigned BestHeight = 0;
+  for (MachineBasicBlock::const_succ_iterator
+       I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+    const MachineBasicBlock *Succ = *I;
+    // Don't consider back-edges.
+    if (CurLoop && Succ == CurLoop->getHeader())
+      continue;
+    // Don't consider successors exiting CurLoop.
+    if (isExitingLoop(CurLoop, getLoopFor(Succ)))
+      continue;
+    const MachineTraceMetrics::TraceBlockInfo *SuccTBI =
+      getHeightResources(Succ);
+    // Ignore cycles that aren't natural loops.
+    if (!SuccTBI)
+      continue;
+    // Pick the successor that would give this block the smallest InstrHeight.
+    unsigned Height = SuccTBI->InstrHeight;
+    if (!Best || Height < BestHeight)
+      Best = Succ, BestHeight = Height;
+  }
+  return Best;
+}
+
+// Get an Ensemble sub-class for the requested trace strategy.
+MachineTraceMetrics::Ensemble *
+MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
+  assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
+  Ensemble *&E = Ensembles[strategy];
+  if (E)
+    return E;
+
+  // Allocate new Ensemble on demand.
+  switch (strategy) {
+  case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+  default: llvm_unreachable("Invalid trace strategy enum");
+  }
+}
+
+void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n');
+  BlockInfo[MBB->getNumber()].invalidate();
+  for (unsigned i = 0; i != TS_NumStrategies; ++i)
+    if (Ensembles[i])
+      Ensembles[i]->invalidate(MBB);
+}
+
+void MachineTraceMetrics::verifyAnalysis() const {
+  if (!MF)
+    return;
+#ifndef NDEBUG
+  assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size");
+  for (unsigned i = 0; i != TS_NumStrategies; ++i)
+    if (Ensembles[i])
+      Ensembles[i]->verify();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                               Trace building
+//===----------------------------------------------------------------------===//
+//
+// Traces are built by two CFG traversals. To avoid recomputing too much, use a
+// set abstraction that confines the search to the current loop, and doesn't
+// revisit blocks.
+
+namespace {
+struct LoopBounds {
+  MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
+  SmallPtrSet<const MachineBasicBlock*, 8> Visited;
+  const MachineLoopInfo *Loops;
+  bool Downward;
+  LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
+             const MachineLoopInfo *loops)
+    : Blocks(blocks), Loops(loops), Downward(false) {}
+};
+}
+
+// Specialize po_iterator_storage in order to prune the post-order traversal so
+// it is limited to the current loop and doesn't traverse the loop back edges.
+namespace llvm {
+template<>
+class po_iterator_storage<LoopBounds, true> {
+  LoopBounds &LB;
+public:
+  po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+  void finishPostorder(const MachineBasicBlock*) {}
+
+  bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+    // Skip already visited To blocks.
+    MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
+    if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
+      return false;
+    // From is null once when To is the trace center block.
+    if (From) {
+      if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+        // Don't follow backedges, don't leave FromLoop when going upwards.
+        if ((LB.Downward ? To : From) == FromLoop->getHeader())
+          return false;
+        // Don't leave FromLoop.
+        if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
+          return false;
+      }
+    }
+    // To is a new block. Mark the block as visited in case the CFG has cycles
+    // that MachineLoopInfo didn't recognize as a natural loop.
+    return LB.Visited.insert(To);
+  }
+};
+}
+
+/// Compute the trace through MBB.
+void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Computing " << getName() << " trace through BB#"
+               << MBB->getNumber() << '\n');
+  // Set up loop bounds for the backwards post-order traversal.
+  LoopBounds Bounds(BlockInfo, MTM.Loops);
+
+  // Run an upwards post-order search for the trace start.
+  Bounds.Downward = false;
+  Bounds.Visited.clear();
+  typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO;
+  for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds);
+       I != E; ++I) {
+    DEBUG(dbgs() << "  pred for BB#" << I->getNumber() << ": ");
+    TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+    // All the predecessors have been visited, pick the preferred one.
+    TBI.Pred = pickTracePred(*I);
+    DEBUG({
+      if (TBI.Pred)
+        dbgs() << "BB#" << TBI.Pred->getNumber() << '\n';
+      else
+        dbgs() << "null\n";
+    });
+    // The trace leading to I is now known, compute the depth resources.
+    computeDepthResources(*I);
+  }
+
+  // Run a downwards post-order search for the trace end.
+  Bounds.Downward = true;
+  Bounds.Visited.clear();
+  typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO;
+  for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds);
+       I != E; ++I) {
+    DEBUG(dbgs() << "  succ for BB#" << I->getNumber() << ": ");
+    TraceBlockInfo &TBI = BlockInfo[I->getNumber()];
+    // All the successors have been visited, pick the preferred one.
+    TBI.Succ = pickTraceSucc(*I);
+    DEBUG({
+      if (TBI.Succ)
+        dbgs() << "BB#" << TBI.Succ->getNumber() << '\n';
+      else
+        dbgs() << "null\n";
+    });
+    // The trace leaving I is now known, compute the height resources.
+    computeHeightResources(*I);
+  }
+}
+
+/// Invalidate traces through BadMBB.
+void
+MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
+  SmallVector<const MachineBasicBlock*, 16> WorkList;
+  TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()];
+
+  // Invalidate height resources of blocks above MBB.
+  if (BadTBI.hasValidHeight()) {
+    BadTBI.invalidateHeight();
+    WorkList.push_back(BadMBB);
+    do {
+      const MachineBasicBlock *MBB = WorkList.pop_back_val();
+      DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+            << " height.\n");
+      // Find any MBB predecessors that have MBB as their preferred successor.
+      // They are the only ones that need to be invalidated.
+      for (MachineBasicBlock::const_pred_iterator
+           I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
+        TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+        if (!TBI.hasValidHeight())
+          continue;
+        if (TBI.Succ == MBB) {
+          TBI.invalidateHeight();
+          WorkList.push_back(*I);
+          continue;
+        }
+        // Verify that TBI.Succ is actually a *I successor.
+        assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+      }
+    } while (!WorkList.empty());
+  }
+
+  // Invalidate depth resources of blocks below MBB.
+  if (BadTBI.hasValidDepth()) {
+    BadTBI.invalidateDepth();
+    WorkList.push_back(BadMBB);
+    do {
+      const MachineBasicBlock *MBB = WorkList.pop_back_val();
+      DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName()
+            << " depth.\n");
+      // Find any MBB successors that have MBB as their preferred predecessor.
+      // They are the only ones that need to be invalidated.
+      for (MachineBasicBlock::const_succ_iterator
+           I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
+        TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+        if (!TBI.hasValidDepth())
+          continue;
+        if (TBI.Pred == MBB) {
+          TBI.invalidateDepth();
+          WorkList.push_back(*I);
+          continue;
+        }
+        // Verify that TBI.Pred is actually a *I predecessor.
+        assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+      }
+    } while (!WorkList.empty());
+  }
+
+  // Clear any per-instruction data. We only have to do this for BadMBB itself
+  // because the instructions in that block may change. Other blocks may be
+  // invalidated, but their instructions will stay the same, so there is no
+  // need to erase the Cycle entries. They will be overwritten when we
+  // recompute.
+  for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
+       I != E; ++I)
+    Cycles.erase(I);
+}
+
+void MachineTraceMetrics::Ensemble::verify() const {
+#ifndef NDEBUG
+  assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() &&
+         "Outdated BlockInfo size");
+  for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) {
+    const TraceBlockInfo &TBI = BlockInfo[Num];
+    if (TBI.hasValidDepth() && TBI.Pred) {
+      const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+      assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace");
+      assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() &&
+             "Trace is broken, depth should have been invalidated.");
+      const MachineLoop *Loop = getLoopFor(MBB);
+      assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge");
+    }
+    if (TBI.hasValidHeight() && TBI.Succ) {
+      const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num);
+      assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace");
+      assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() &&
+             "Trace is broken, height should have been invalidated.");
+      const MachineLoop *Loop = getLoopFor(MBB);
+      const MachineLoop *SuccLoop = getLoopFor(TBI.Succ);
+      assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) &&
+             "Trace contains backedge");
+    }
+  }
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                             Data Dependencies
+//===----------------------------------------------------------------------===//
+//
+// Compute the depth and height of each instruction based on data dependencies
+// and instruction latencies. These cycle numbers assume that the CPU can issue
+// an infinite number of instructions per cycle as long as their dependencies
+// are ready.
+
+// A data dependency is represented as a defining MI and operand numbers on the
+// defining and using MI.
+namespace {
+struct DataDep {
+  const MachineInstr *DefMI;
+  unsigned DefOp;
+  unsigned UseOp;
+
+  DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp)
+    : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {}
+
+  /// Create a DataDep from an SSA form virtual register.
+  DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
+    : UseOp(UseOp) {
+    assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+    MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
+    assert(!DefI.atEnd() && "Register has no defs");
+    DefMI = &*DefI;
+    DefOp = DefI.getOperandNo();
+    assert((++DefI).atEnd() && "Register has multiple defs");
+  }
+};
+}
+
+// Get the input data dependencies that must be ready before UseMI can issue.
+// Return true if UseMI has any physreg operands.
+static bool getDataDeps(const MachineInstr *UseMI,
+                        SmallVectorImpl<DataDep> &Deps,
+                        const MachineRegisterInfo *MRI) {
+  bool HasPhysRegs = false;
+  for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+    if (!MO->isReg())
+      continue;
+    unsigned Reg = MO->getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      HasPhysRegs = true;
+      continue;
+    }
+    // Collect virtual register reads.
+    if (MO->readsReg())
+      Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
+  }
+  return HasPhysRegs;
+}
+
+// Get the input data dependencies of a PHI instruction, using Pred as the
+// preferred predecessor.
+// This will add at most one dependency to Deps.
+static void getPHIDeps(const MachineInstr *UseMI,
+                       SmallVectorImpl<DataDep> &Deps,
+                       const MachineBasicBlock *Pred,
+                       const MachineRegisterInfo *MRI) {
+  // No predecessor at the beginning of a trace. Ignore dependencies.
+  if (!Pred)
+    return;
+  assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI");
+  for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) {
+    if (UseMI->getOperand(i + 1).getMBB() == Pred) {
+      unsigned Reg = UseMI->getOperand(i).getReg();
+      Deps.push_back(DataDep(MRI, Reg, i));
+      return;
+    }
+  }
+}
+
+// Keep track of physreg data dependencies by recording each live register unit.
+// Associate each regunit with an instruction operand. Depending on the
+// direction instructions are scanned, it could be the operand that defined the
+// regunit, or the highest operand to read the regunit.
+namespace {
+struct LiveRegUnit {
+  unsigned RegUnit;
+  unsigned Cycle;
+  const MachineInstr *MI;
+  unsigned Op;
+
+  unsigned getSparseSetIndex() const { return RegUnit; }
+
+  LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+};
+}
+
+// Identify physreg dependencies for UseMI, and update the live regunit
+// tracking set when scanning instructions downwards.
+static void updatePhysDepsDownwards(const MachineInstr *UseMI,
+                                    SmallVectorImpl<DataDep> &Deps,
+                                    SparseSet<LiveRegUnit> &RegUnits,
+                                    const TargetRegisterInfo *TRI) {
+  SmallVector<unsigned, 8> Kills;
+  SmallVector<unsigned, 8> LiveDefOps;
+
+  for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) {
+    if (!MO->isReg())
+      continue;
+    unsigned Reg = MO->getReg();
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    // Track live defs and kills for updating RegUnits.
+    if (MO->isDef()) {
+      if (MO->isDead())
+        Kills.push_back(Reg);
+      else
+        LiveDefOps.push_back(MO.getOperandNo());
+    } else if (MO->isKill())
+      Kills.push_back(Reg);
+    // Identify dependencies.
+    if (!MO->readsReg())
+      continue;
+    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+      SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+      if (I == RegUnits.end())
+        continue;
+      Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
+      break;
+    }
+  }
+
+  // Update RegUnits to reflect live registers after UseMI.
+  // First kills.
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+    for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+      RegUnits.erase(*Units);
+
+  // Second, live defs.
+  for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
+    unsigned DefOp = LiveDefOps[i];
+    for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
+         Units.isValid(); ++Units) {
+      LiveRegUnit &LRU = RegUnits[*Units];
+      LRU.MI = UseMI;
+      LRU.Op = DefOp;
+    }
+  }
+}
+
+/// The length of the critical path through a trace is the maximum of two path
+/// lengths:
+///
+/// 1. The maximum height+depth over all instructions in the trace center block.
+///
+/// 2. The longest cross-block dependency chain. For small blocks, it is
+///    possible that the critical path through the trace doesn't include any
+///    instructions in the block.
+///
+/// This function computes the second number from the live-in list of the
+/// center block.
+unsigned MachineTraceMetrics::Ensemble::
+computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
+  assert(TBI.HasValidInstrDepths && "Missing depth info");
+  assert(TBI.HasValidInstrHeights && "Missing height info");
+  unsigned MaxLen = 0;
+  for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+    const LiveInReg &LIR = TBI.LiveIns[i];
+    if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+      continue;
+    const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+    // Ignore dependencies outside the current trace.
+    const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()];
+    if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head)
+      continue;
+    unsigned Len = LIR.Height + Cycles[DefMI].Depth;
+    MaxLen = std::max(MaxLen, Len);
+  }
+  return MaxLen;
+}
+
+/// Compute instruction depths for all instructions above or in MBB in its
+/// trace. This assumes that the trace through MBB has already been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrDepths(const MachineBasicBlock *MBB) {
+  // The top of the trace may already be computed, and HasValidInstrDepths
+  // implies Head->HasValidInstrDepths, so we only need to start from the first
+  // block in the trace that needs to be recomputed.
+  SmallVector<const MachineBasicBlock*, 8> Stack;
+  do {
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    assert(TBI.hasValidDepth() && "Incomplete trace");
+    if (TBI.HasValidInstrDepths)
+      break;
+    Stack.push_back(MBB);
+    MBB = TBI.Pred;
+  } while (MBB);
+
+  // FIXME: If MBB is non-null at this point, it is the last pre-computed block
+  // in the trace. We should track any live-out physregs that were defined in
+  // the trace. This is quite rare in SSA form, typically created by CSE
+  // hoisting a compare.
+  SparseSet<LiveRegUnit> RegUnits;
+  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+  // Go through trace blocks in top-down order, stopping after the center block.
+  SmallVector<DataDep, 8> Deps;
+  while (!Stack.empty()) {
+    MBB = Stack.pop_back_val();
+    DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n");
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    TBI.HasValidInstrDepths = true;
+    TBI.CriticalPath = 0;
+
+    // Also compute the critical path length through MBB when possible.
+    if (TBI.HasValidInstrHeights)
+      TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
+
+    for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      const MachineInstr *UseMI = I;
+
+      // Collect all data dependencies.
+      Deps.clear();
+      if (UseMI->isPHI())
+        getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
+      else if (getDataDeps(UseMI, Deps, MTM.MRI))
+        updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+
+      // Filter and process dependencies, computing the earliest issue cycle.
+      unsigned Cycle = 0;
+      for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+        const DataDep &Dep = Deps[i];
+        const TraceBlockInfo&DepTBI =
+          BlockInfo[Dep.DefMI->getParent()->getNumber()];
+        // Ignore dependencies from outside the current trace.
+        if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head)
+          continue;
+        assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency");
+        unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth;
+        // Add latency if DefMI is a real instruction. Transients get latency 0.
+        if (!Dep.DefMI->isTransient())
+          DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData,
+                                                     Dep.DefMI, Dep.DefOp,
+                                                     UseMI, Dep.UseOp,
+                                                     /* FindMin = */ false);
+        Cycle = std::max(Cycle, DepCycle);
+      }
+      // Remember the instruction depth.
+      InstrCycles &MICycles = Cycles[UseMI];
+      MICycles.Depth = Cycle;
+
+      if (!TBI.HasValidInstrHeights) {
+        DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+        continue;
+      }
+      // Update critical path length.
+      TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
+      DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+    }
+  }
+}
+
+// Identify physreg dependencies for MI when scanning instructions upwards.
+// Return the issue height of MI after considering any live regunits.
+// Height is the issue height computed from virtual register dependencies alone.
+static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height,
+                                      SparseSet<LiveRegUnit> &RegUnits,
+                                      const InstrItineraryData *ItinData,
+                                      const TargetInstrInfo *TII,
+                                      const TargetRegisterInfo *TRI) {
+  SmallVector<unsigned, 8> ReadOps;
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg())
+      continue;
+    unsigned Reg = MO->getReg();
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MO->readsReg())
+      ReadOps.push_back(MO.getOperandNo());
+    if (!MO->isDef())
+      continue;
+    // This is a def of Reg. Remove corresponding entries from RegUnits, and
+    // update MI Height to consider the physreg dependencies.
+    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+      SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+      if (I == RegUnits.end())
+        continue;
+      unsigned DepHeight = I->Cycle;
+      if (!MI->isTransient()) {
+        // We may not know the UseMI of this dependency, if it came from the
+        // live-in list.
+        if (I->MI)
+          DepHeight += TII->computeOperandLatency(ItinData,
+                                                  MI, MO.getOperandNo(),
+                                                  I->MI, I->Op);
+        else
+          // No UseMI. Just use the MI latency instead.
+          DepHeight += TII->getInstrLatency(ItinData, MI);
+      }
+      Height = std::max(Height, DepHeight);
+      // This regunit is dead above MI.
+      RegUnits.erase(I);
+    }
+  }
+
+  // Now we know the height of MI. Update any regunits read.
+  for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
+    unsigned Reg = MI->getOperand(ReadOps[i]).getReg();
+    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+      LiveRegUnit &LRU = RegUnits[*Units];
+      // Set the height to the highest reader of the unit.
+      if (LRU.Cycle <= Height && LRU.MI != MI) {
+        LRU.Cycle = Height;
+        LRU.MI = MI;
+        LRU.Op = ReadOps[i];
+      }
+    }
+  }
+
+  return Height;
+}
+
+
+typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
+
+// Push the height of DefMI upwards if required to match UseMI.
+// Return true if this is the first time DefMI was seen.
+static bool pushDepHeight(const DataDep &Dep,
+                          const MachineInstr *UseMI, unsigned UseHeight,
+                          MIHeightMap &Heights,
+                          const InstrItineraryData *ItinData,
+                          const TargetInstrInfo *TII) {
+  // Adjust height by Dep.DefMI latency.
+  if (!Dep.DefMI->isTransient())
+    UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp,
+                                            UseMI, Dep.UseOp);
+
+  // Update Heights[DefMI] to be the maximum height seen.
+  MIHeightMap::iterator I;
+  bool New;
+  tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight));
+  if (New)
+    return true;
+
+  // DefMI has been pushed before. Give it the max height.
+  if (I->second < UseHeight)
+    I->second = UseHeight;
+  return false;
+}
+
+/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists
+/// of all the blocks in Trace. Stop when reaching the block that contains
+/// DefMI.
+void MachineTraceMetrics::Ensemble::
+addLiveIns(const MachineInstr *DefMI,
+           ArrayRef<const MachineBasicBlock*> Trace) {
+  assert(!Trace.empty() && "Trace should contain at least one block");
+  unsigned Reg = DefMI->getOperand(0).getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  const MachineBasicBlock *DefMBB = DefMI->getParent();
+
+  // Reg is live-in to all blocks in Trace that follow DefMBB.
+  for (unsigned i = Trace.size(); i; --i) {
+    const MachineBasicBlock *MBB = Trace[i-1];
+    if (MBB == DefMBB)
+      return;
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    // Just add the register. The height will be updated later.
+    TBI.LiveIns.push_back(Reg);
+  }
+}
+
+/// Compute instruction heights in the trace through MBB. This updates MBB and
+/// the blocks below it in the trace. It is assumed that the trace has already
+/// been computed.
+void MachineTraceMetrics::Ensemble::
+computeInstrHeights(const MachineBasicBlock *MBB) {
+  // The bottom of the trace may already be computed.
+  // Find the blocks that need updating.
+  SmallVector<const MachineBasicBlock*, 8> Stack;
+  do {
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    assert(TBI.hasValidHeight() && "Incomplete trace");
+    if (TBI.HasValidInstrHeights)
+      break;
+    Stack.push_back(MBB);
+    TBI.LiveIns.clear();
+    MBB = TBI.Succ;
+  } while (MBB);
+
+  // As we move upwards in the trace, keep track of instructions that are
+  // required by deeper trace instructions. Map MI -> height required so far.
+  MIHeightMap Heights;
+
+  // For physregs, the def isn't known when we see the use.
+  // Instead, keep track of the highest use of each regunit.
+  SparseSet<LiveRegUnit> RegUnits;
+  RegUnits.setUniverse(MTM.TRI->getNumRegUnits());
+
+  // If the bottom of the trace was already precomputed, initialize heights
+  // from its live-in list.
+  // MBB is the highest precomputed block in the trace.
+  if (MBB) {
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+      LiveInReg LI = TBI.LiveIns[i];
+      if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+        // For virtual registers, the def latency is included.
+        unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
+        if (Height < LI.Height)
+          Height = LI.Height;
+      } else {
+        // For register units, the def latency is not included because we don't
+        // know the def yet.
+        RegUnits[LI.Reg].Cycle = LI.Height;
+      }
+    }
+  }
+
+  // Go through the trace blocks in bottom-up order.
+  SmallVector<DataDep, 8> Deps;
+  for (;!Stack.empty(); Stack.pop_back()) {
+    MBB = Stack.back();
+    DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n");
+    TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+    TBI.HasValidInstrHeights = true;
+    TBI.CriticalPath = 0;
+
+    // Get dependencies from PHIs in the trace successor.
+    const MachineBasicBlock *Succ = TBI.Succ;
+    // If MBB is the last block in the trace, and it has a back-edge to the
+    // loop header, get loop-carried dependencies from PHIs in the header. For
+    // that purpose, pretend that all the loop header PHIs have height 0.
+    if (!Succ)
+      if (const MachineLoop *Loop = getLoopFor(MBB))
+        if (MBB->isSuccessor(Loop->getHeader()))
+          Succ = Loop->getHeader();
+
+    if (Succ) {
+      for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
+           I != E && I->isPHI(); ++I) {
+        const MachineInstr *PHI = I;
+        Deps.clear();
+        getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+        if (!Deps.empty()) {
+          // Loop header PHI heights are all 0.
+          unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
+          DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
+          if (pushDepHeight(Deps.front(), PHI, Height,
+                            Heights, MTM.ItinData, MTM.TII))
+            addLiveIns(Deps.front().DefMI, Stack);
+        }
+      }
+    }
+
+    // Go through the block backwards.
+    for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
+         BI != BB;) {
+      const MachineInstr *MI = --BI;
+
+      // Find the MI height as determined by virtual register uses in the
+      // trace below.
+      unsigned Cycle = 0;
+      MIHeightMap::iterator HeightI = Heights.find(MI);
+      if (HeightI != Heights.end()) {
+        Cycle = HeightI->second;
+        // We won't be seeing any more MI uses.
+        Heights.erase(HeightI);
+      }
+
+      // Don't process PHI deps. They depend on the specific predecessor, and
+      // we'll get them when visiting the predecessor.
+      Deps.clear();
+      bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI);
+
+      // There may also be regunit dependencies to include in the height.
+      if (HasPhysRegs)
+        Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits,
+                                      MTM.ItinData, MTM.TII, MTM.TRI);
+
+      // Update the required height of any virtual registers read by MI.
+      for (unsigned i = 0, e = Deps.size(); i != e; ++i)
+        if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII))
+          addLiveIns(Deps[i].DefMI, Stack);
+
+      InstrCycles &MICycles = Cycles[MI];
+      MICycles.Height = Cycle;
+      if (!TBI.HasValidInstrDepths) {
+        DEBUG(dbgs() << Cycle << '\t' << *MI);
+        continue;
+      }
+      // Update critical path length.
+      TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth);
+      DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI);
+    }
+
+    // Update virtual live-in heights. They were added by addLiveIns() with a 0
+    // height because the final height isn't known until now.
+    DEBUG(dbgs() << "BB#" << MBB->getNumber() <<  " Live-ins:");
+    for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
+      LiveInReg &LIR = TBI.LiveIns[i];
+      const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
+      LIR.Height = Heights.lookup(DefMI);
+      DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height);
+    }
+
+    // Transfer the live regunits to the live-in list.
+    for (SparseSet<LiveRegUnit>::const_iterator
+         RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
+      TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
+      DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI)
+                   << '@' << RI->Cycle);
+    }
+    DEBUG(dbgs() << '\n');
+
+    if (!TBI.HasValidInstrDepths)
+      continue;
+    // Add live-ins to the critical path length.
+    TBI.CriticalPath = std::max(TBI.CriticalPath,
+                                computeCrossBlockCriticalPath(TBI));
+    DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n');
+  }
+}
+
+MachineTraceMetrics::Trace
+MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
+  // FIXME: Check cache tags, recompute as needed.
+  computeTrace(MBB);
+  computeInstrDepths(MBB);
+  computeInstrHeights(MBB);
+  return Trace(*this, BlockInfo[MBB->getNumber()]);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const {
+  assert(MI && "Not an instruction.");
+  assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) &&
+         "MI must be in the trace center block");
+  InstrCycles Cyc = getInstrCycles(MI);
+  return getCriticalPath() - (Cyc.Depth + Cyc.Height);
+}
+
+unsigned
+MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const {
+  const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum());
+  SmallVector<DataDep, 1> Deps;
+  getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI);
+  assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor");
+  DataDep &Dep = Deps.front();
+  unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth;
+  // Add latency if DefMI is a real instruction. Transients get latency 0.
+  if (!Dep.DefMI->isTransient())
+    DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData,
+                                                  Dep.DefMI, Dep.DefOp,
+                                                  PHI, Dep.UseOp,
+                                                  /* FindMin = */ false);
+  return DepCycle;
+}
+
+unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const {
+  // For now, we compute the resource depth from instruction count / issue
+  // width. Eventually, we should compute resource depth per functional unit
+  // and return the max.
+  unsigned Instrs = TBI.InstrDepth;
+  if (Bottom)
+    Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount;
+  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+    if (Model->IssueWidth != 0)
+      return Instrs / Model->IssueWidth;
+  // Assume issue width 1 without a schedule model.
+  return Instrs;
+}
+
+unsigned MachineTraceMetrics::Trace::
+getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const {
+  unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
+  for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
+    Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+  if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel)
+    if (Model->IssueWidth != 0)
+      return Instrs / Model->IssueWidth;
+  // Assume issue width 1 without a schedule model.
+  return Instrs;
+}
+
+void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const {
+  OS << getName() << " ensemble:\n";
+  for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+    OS << "  BB#" << i << '\t';
+    BlockInfo[i].print(OS);
+    OS << '\n';
+  }
+}
+
+void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const {
+  if (hasValidDepth()) {
+    OS << "depth=" << InstrDepth;
+    if (Pred)
+      OS << " pred=BB#" << Pred->getNumber();
+    else
+      OS << " pred=null";
+    OS << " head=BB#" << Head;
+    if (HasValidInstrDepths)
+      OS << " +instrs";
+  } else
+    OS << "depth invalid";
+  OS << ", ";
+  if (hasValidHeight()) {
+    OS << "height=" << InstrHeight;
+    if (Succ)
+      OS << " succ=BB#" << Succ->getNumber();
+    else
+      OS << " succ=null";
+    OS << " tail=BB#" << Tail;
+    if (HasValidInstrHeights)
+      OS << " +instrs";
+  } else
+    OS << "height invalid";
+  if (HasValidInstrDepths && HasValidInstrHeights)
+    OS << ", crit=" << CriticalPath;
+}
+
+void MachineTraceMetrics::Trace::print(raw_ostream &OS) const {
+  unsigned MBBNum = &TBI - &TE.BlockInfo[0];
+
+  OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum
+     << " --> BB#" << TBI.Tail << ':';
+  if (TBI.hasValidHeight() && TBI.hasValidDepth())
+    OS << ' ' << getInstrCount() << " instrs.";
+  if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights)
+    OS << ' ' << TBI.CriticalPath << " cycles.";
+
+  const MachineTraceMetrics::TraceBlockInfo *Block = &TBI;
+  OS << "\nBB#" << MBBNum;
+  while (Block->hasValidDepth() && Block->Pred) {
+    unsigned Num = Block->Pred->getNumber();
+    OS << " <- BB#" << Num;
+    Block = &TE.BlockInfo[Num];
+  }
+
+  Block = &TBI;
+  OS << "\n    ";
+  while (Block->hasValidHeight() && Block->Succ) {
+    unsigned Num = Block->Succ->getNumber();
+    OS << " -> BB#" << Num;
+    Block = &TE.BlockInfo[Num];
+  }
+  OS << '\n';
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h
new file mode 100644
index 0000000..c5b86f3
--- /dev/null
+++ b/lib/CodeGen/MachineTraceMetrics.h
@@ -0,0 +1,341 @@
+//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the MachineTraceMetrics analysis pass
+// that estimates CPU resource usage and critical data dependency paths through
+// preferred traces. This is useful for super-scalar CPUs where execution speed
+// can be limited both by data dependencies and by limited execution resources.
+//
+// Out-of-order CPUs will often be executing instructions from multiple basic
+// blocks at the same time. This makes it difficult to estimate the resource
+// usage accurately in a single basic block. Resources can be estimated better
+// by looking at a trace through the current basic block.
+//
+// For every block, the MachineTraceMetrics pass will pick a preferred trace
+// that passes through the block. The trace is chosen based on loop structure,
+// branch probabilities, and resource usage. The intention is to pick likely
+// traces that would be the most affected by code transformations.
+//
+// It is expensive to compute a full arbitrary trace for every block, so to
+// save some computations, traces are chosen to be convergent. This means that
+// if the traces through basic blocks A and B ever cross when moving away from
+// A and B, they never diverge again. This applies in both directions - If the
+// traces meet above A and B, they won't diverge when going further back.
+//
+// Traces tend to align with loops. The trace through a block in an inner loop
+// will begin at the loop entry block and end at a back edge. If there are
+// nested loops, the trace may begin and end at those instead.
+//
+// For each trace, we compute the critical path length, which is the number of
+// cycles required to execute the trace when execution is limited by data
+// dependencies only. We also compute the resource height, which is the number
+// of cycles required to execute all instructions in the trace when ignoring
+// data dependencies.
+//
+// Every instruction in the current block has a slack - the number of cycles
+// execution of the instruction can be delayed without extending the critical
+// path.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class raw_ostream;
+
+class MachineTraceMetrics : public MachineFunctionPass {
+  const MachineFunction *MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const InstrItineraryData *ItinData;
+  const MachineRegisterInfo *MRI;
+  const MachineLoopInfo *Loops;
+
+public:
+  class Ensemble;
+  class Trace;
+  static char ID;
+  MachineTraceMetrics();
+  void getAnalysisUsage(AnalysisUsage&) const;
+  bool runOnMachineFunction(MachineFunction&);
+  void releaseMemory();
+  void verifyAnalysis() const;
+
+  friend class Ensemble;
+  friend class Trace;
+
+  /// Per-basic block information that doesn't depend on the trace through the
+  /// block.
+  struct FixedBlockInfo {
+    /// The number of non-trivial instructions in the block.
+    /// Doesn't count PHI and COPY instructions that are likely to be removed.
+    unsigned InstrCount;
+
+    /// True when the block contains calls.
+    bool HasCalls;
+
+    FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
+
+    /// Returns true when resource information for this block has been computed.
+    bool hasResources() const { return InstrCount != ~0u; }
+
+    /// Invalidate resource information.
+    void invalidate() { InstrCount = ~0u; }
+  };
+
+  /// Get the fixed resource information about MBB. Compute it on demand.
+  const FixedBlockInfo *getResources(const MachineBasicBlock*);
+
+  /// A virtual register or regunit required by a basic block or its trace
+  /// successors.
+  struct LiveInReg {
+    /// The virtual register required, or a register unit.
+    unsigned Reg;
+
+    /// For virtual registers: Minimum height of the defining instruction.
+    /// For regunits: Height of the highest user in the trace.
+    unsigned Height;
+
+    LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
+  };
+
+  /// Per-basic block information that relates to a specific trace through the
+  /// block. Convergent traces means that only one of these is required per
+  /// block in a trace ensemble.
+  struct TraceBlockInfo {
+    /// Trace predecessor, or NULL for the first block in the trace.
+    /// Valid when hasValidDepth().
+    const MachineBasicBlock *Pred;
+
+    /// Trace successor, or NULL for the last block in the trace.
+    /// Valid when hasValidHeight().
+    const MachineBasicBlock *Succ;
+
+    /// The block number of the head of the trace. (When hasValidDepth()).
+    unsigned Head;
+
+    /// The block number of the tail of the trace. (When hasValidHeight()).
+    unsigned Tail;
+
+    /// Accumulated number of instructions in the trace above this block.
+    /// Does not include instructions in this block.
+    unsigned InstrDepth;
+
+    /// Accumulated number of instructions in the trace below this block.
+    /// Includes instructions in this block.
+    unsigned InstrHeight;
+
+    TraceBlockInfo() :
+      Pred(0), Succ(0),
+      InstrDepth(~0u), InstrHeight(~0u),
+      HasValidInstrDepths(false), HasValidInstrHeights(false) {}
+
+    /// Returns true if the depth resources have been computed from the trace
+    /// above this block.
+    bool hasValidDepth() const { return InstrDepth != ~0u; }
+
+    /// Returns true if the height resources have been computed from the trace
+    /// below this block.
+    bool hasValidHeight() const { return InstrHeight != ~0u; }
+
+    /// Invalidate depth resources when some block above this one has changed.
+    void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
+
+    /// Invalidate height resources when a block below this one has changed.
+    void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
+
+    // Data-dependency-related information. Per-instruction depth and height
+    // are computed from data dependencies in the current trace, using
+    // itinerary data.
+
+    /// Instruction depths have been computed. This implies hasValidDepth().
+    bool HasValidInstrDepths;
+
+    /// Instruction heights have been computed. This implies hasValidHeight().
+    bool HasValidInstrHeights;
+
+    /// Critical path length. This is the number of cycles in the longest data
+    /// dependency chain through the trace. This is only valid when both
+    /// HasValidInstrDepths and HasValidInstrHeights are set.
+    unsigned CriticalPath;
+
+    /// Live-in registers. These registers are defined above the current block
+    /// and used by this block or a block below it.
+    /// This does not include PHI uses in the current block, but it does
+    /// include PHI uses in deeper blocks.
+    SmallVector<LiveInReg, 4> LiveIns;
+
+    void print(raw_ostream&) const;
+  };
+
+  /// InstrCycles represents the cycle height and depth of an instruction in a
+  /// trace.
+  struct InstrCycles {
+    /// Earliest issue cycle as determined by data dependencies and instruction
+    /// latencies from the beginning of the trace. Data dependencies from
+    /// before the trace are not included.
+    unsigned Depth;
+
+    /// Minimum number of cycles from this instruction is issued to the of the
+    /// trace, as determined by data dependencies and instruction latencies.
+    unsigned Height;
+  };
+
+  /// A trace represents a plausible sequence of executed basic blocks that
+  /// passes through the current basic block one. The Trace class serves as a
+  /// handle to internal cached data structures.
+  class Trace {
+    Ensemble &TE;
+    TraceBlockInfo &TBI;
+
+    unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
+
+  public:
+    explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
+    void print(raw_ostream&) const;
+
+    /// Compute the total number of instructions in the trace.
+    unsigned getInstrCount() const {
+      return TBI.InstrDepth + TBI.InstrHeight;
+    }
+
+    /// Return the resource depth of the top/bottom of the trace center block.
+    /// This is the number of cycles required to execute all instructions from
+    /// the trace head to the trace center block. The resource depth only
+    /// considers execution resources, it ignores data dependencies.
+    /// When Bottom is set, instructions in the trace center block are included.
+    unsigned getResourceDepth(bool Bottom) const;
+
+    /// Return the resource length of the trace. This is the number of cycles
+    /// required to execute the instructions in the trace if they were all
+    /// independent, exposing the maximum instruction-level parallelism.
+    ///
+    /// Any blocks in Extrablocks are included as if they were part of the
+    /// trace.
+    unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks =
+                               ArrayRef<const MachineBasicBlock*>()) const;
+
+    /// Return the length of the (data dependency) critical path through the
+    /// trace.
+    unsigned getCriticalPath() const { return TBI.CriticalPath; }
+
+    /// Return the depth and height of MI. The depth is only valid for
+    /// instructions in or above the trace center block. The height is only
+    /// valid for instructions in or below the trace center block.
+    InstrCycles getInstrCycles(const MachineInstr *MI) const {
+      return TE.Cycles.lookup(MI);
+    }
+
+    /// Return the slack of MI. This is the number of cycles MI can be delayed
+    /// before the critical path becomes longer.
+    /// MI must be an instruction in the trace center block.
+    unsigned getInstrSlack(const MachineInstr *MI) const;
+
+    /// Return the Depth of a PHI instruction in a trace center block successor.
+    /// The PHI does not have to be part of the trace.
+    unsigned getPHIDepth(const MachineInstr *PHI) const;
+  };
+
+  /// A trace ensemble is a collection of traces selected using the same
+  /// strategy, for example 'minimum resource height'. There is one trace for
+  /// every block in the function.
+  class Ensemble {
+    SmallVector<TraceBlockInfo, 4> BlockInfo;
+    DenseMap<const MachineInstr*, InstrCycles> Cycles;
+    friend class Trace;
+
+    void computeTrace(const MachineBasicBlock*);
+    void computeDepthResources(const MachineBasicBlock*);
+    void computeHeightResources(const MachineBasicBlock*);
+    unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
+    void computeInstrDepths(const MachineBasicBlock*);
+    void computeInstrHeights(const MachineBasicBlock*);
+    void addLiveIns(const MachineInstr *DefMI,
+                    ArrayRef<const MachineBasicBlock*> Trace);
+
+  protected:
+    MachineTraceMetrics &MTM;
+    virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
+    virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
+    explicit Ensemble(MachineTraceMetrics*);
+    const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
+    const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
+    const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
+
+  public:
+    virtual ~Ensemble();
+    virtual const char *getName() const =0;
+    void print(raw_ostream&) const;
+    void invalidate(const MachineBasicBlock *MBB);
+    void verify() const;
+
+    /// Get the trace that passes through MBB.
+    /// The trace is computed on demand.
+    Trace getTrace(const MachineBasicBlock *MBB);
+  };
+
+  /// Strategies for selecting traces.
+  enum Strategy {
+    /// Select the trace through a block that has the fewest instructions.
+    TS_MinInstrCount,
+
+    TS_NumStrategies
+  };
+
+  /// Get the trace ensemble representing the given trace selection strategy.
+  /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
+  /// and valid for the lifetime of the analysis pass.
+  Ensemble *getEnsemble(Strategy);
+
+  /// Invalidate cached information about MBB. This must be called *before* MBB
+  /// is erased, or the CFG is otherwise changed.
+  ///
+  /// This invalidates per-block information about resource usage for MBB only,
+  /// and it invalidates per-trace information for any trace that passes
+  /// through MBB.
+  ///
+  /// Call Ensemble::getTrace() again to update any trace handles.
+  void invalidate(const MachineBasicBlock *MBB);
+
+private:
+  // One entry per basic block, indexed by block number.
+  SmallVector<FixedBlockInfo, 4> BlockInfo;
+
+  // One ensemble per strategy.
+  Ensemble* Ensembles[TS_NumStrategies];
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const MachineTraceMetrics::Trace &Tr) {
+  Tr.print(OS);
+  return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const MachineTraceMetrics::Ensemble &En) {
+  En.print(OS);
+  return OS;
+}
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d8dece6..852c169 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -73,8 +73,10 @@ namespace {
     typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+    typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet;
 
     const MachineInstr *FirstTerminator;
+    BlockSet FunctionBlocks;
 
     BitVector regsReserved;
     BitVector regsAllocatable;
@@ -117,6 +119,9 @@ namespace {
       // block. This set is disjoint from regsLiveOut.
       RegSet vregsRequired;
 
+      // Set versions of block's predecessor and successor lists.
+      BlockSet Preds, Succs;
+
       BBInfo() : reachable(false) {}
 
       // Add register to vregsPassed if it belongs there. Return true if
@@ -203,6 +208,10 @@ namespace {
     void report(const char *msg, const MachineBasicBlock *MBB);
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+    void report(const char *msg, const MachineFunction *MF,
+                const LiveInterval &LI);
+    void report(const char *msg, const MachineBasicBlock *MBB,
+                const LiveInterval &LI);
 
     void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void markReachable(const MachineBasicBlock *MBB);
@@ -212,6 +221,10 @@ namespace {
     void calcRegsRequired();
     void verifyLiveVariables();
     void verifyLiveIntervals();
+    void verifyLiveInterval(const LiveInterval&);
+    void verifyLiveIntervalValue(const LiveInterval&, VNInfo*);
+    void verifyLiveIntervalSegment(const LiveInterval&,
+                                   LiveInterval::const_iterator);
   };
 
   struct MachineVerifierPass : public MachineFunctionPass {
@@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
   assert(MBB);
   report(msg, MBB->getParent());
-  *OS << "- basic block: " << MBB->getName()
-      << " " << (void*)MBB
-      << " (BB#" << MBB->getNumber() << ")";
+  *OS << "- basic block: BB#" << MBB->getNumber()
+      << ' ' << MBB->getName()
+      << " (" << (void*)MBB << ')';
   if (Indexes)
     *OS << " [" << Indexes->getMBBStartIdx(MBB)
         << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
@@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg,
   *OS << "\n";
 }
 
+void MachineVerifier::report(const char *msg, const MachineFunction *MF,
+                             const LiveInterval &LI) {
+  report(msg, MF);
+  *OS << "- interval:    ";
+  if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+    *OS << PrintReg(LI.reg, TRI);
+  else
+    *OS << PrintRegUnit(LI.reg, TRI);
+  *OS << ' ' << LI << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
+                             const LiveInterval &LI) {
+  report(msg, MBB);
+  *OS << "- interval:    ";
+  if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+    *OS << PrintReg(LI.reg, TRI);
+  else
+    *OS << PrintRegUnit(LI.reg, TRI);
+  *OS << ' ' << LI << '\n';
+}
+
 void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
   BBInfo &MInfo = MBBInfoMap[MBB];
   if (!MInfo.reachable) {
@@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() {
   regsAllocatable = TRI->getAllocatableSet(*MF);
 
   markReachable(&MF->front());
+
+  // Build a set of the basic blocks in the function.
+  FunctionBlocks.clear();
+  for (MachineFunction::const_iterator
+       I = MF->begin(), E = MF->end(); I != E; ++I) {
+    FunctionBlocks.insert(I);
+    BBInfo &MInfo = MBBInfoMap[I];
+
+    MInfo.Preds.insert(I->pred_begin(), I->pred_end());
+    if (MInfo.Preds.size() != I->pred_size())
+      report("MBB has duplicate entries in its predecessor list.", I);
+
+    MInfo.Succs.insert(I->succ_begin(), I->succ_end());
+    if (MInfo.Succs.size() != I->succ_size())
+      report("MBB has duplicate entries in its successor list.", I);
+  }
 }
 
 // Does iterator point to a and b as the first two elements?
@@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
        E = MBB->succ_end(); I != E; ++I) {
     if ((*I)->isLandingPad())
       LandingPadSuccs.insert(*I);
+    if (!FunctionBlocks.count(*I))
+      report("MBB has successor that isn't part of the function.", MBB);
+    if (!MBBInfoMap[*I].Preds.count(MBB)) {
+      report("Inconsistent CFG", MBB);
+      *OS << "MBB is not in the predecessor list of the successor BB#"
+          << (*I)->getNumber() << ".\n";
+    }
+  }
+
+  // Check the predecessor list.
+  for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(),
+       E = MBB->pred_end(); I != E; ++I) {
+    if (!FunctionBlocks.count(*I))
+      report("MBB has predecessor that isn't part of the function.", MBB);
+    if (!MBBInfoMap[*I].Succs.count(MBB)) {
+      report("Inconsistent CFG", MBB);
+      *OS << "MBB is not in the successor list of the predecessor BB#"
+          << (*I)->getNumber() << ".\n";
+    }
   }
 
   const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
@@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       ++MBBI;
       if (MBBI == MF->end()) {
         report("MBB conditionally falls through out of function!", MBB);
-      } if (MBB->succ_size() != 2) {
+      } if (MBB->succ_size() == 1) {
+        // A conditional branch with only one successor is weird, but allowed.
+        if (&*MBBI != TBB)
+          report("MBB exits via conditional branch/fall-through but only has "
+                 "one CFG successor!", MBB);
+        else if (TBB != *MBB->succ_begin())
+          report("MBB exits via conditional branch/fall-through but the CFG "
+                 "successor don't match the actual successor!", MBB);
+      } else if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/fall-through but doesn't have "
                "exactly two CFG successors!", MBB);
       } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
@@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
     } else if (TBB && FBB) {
       // Block conditionally branches somewhere, otherwise branches
       // somewhere else.
-      if (MBB->succ_size() != 2) {
+      if (MBB->succ_size() == 1) {
+        // A conditional branch with only one successor is weird, but allowed.
+        if (FBB != TBB)
+          report("MBB exits via conditional branch/branch through but only has "
+                 "one CFG successor!", MBB);
+        else if (TBB != *MBB->succ_begin())
+          report("MBB exits via conditional branch/branch through but the CFG "
+                 "successor don't match the actual successor!", MBB);
+      } else if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/branch but doesn't have "
                "exactly two CFG successors!", MBB);
       } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
@@ -651,10 +737,10 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const MCInstrDesc &MCID = MI->getDesc();
-  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
 
   // The first MCID.NumDefs operands must be explicit register defines
   if (MONum < MCID.getNumDefs()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     if (!MO->isReg())
       report("Explicit definition must be a register", MO, MONum);
     else if (!MO->isDef() && !MCOI.isOptionalDef())
@@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
   } else if (MONum < MCID.getNumOperands()) {
+    const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
     if (MO->isReg() &&
@@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     if (MRI->tracksLiveness() && !MI->isDebugValue())
       checkLiveness(MO, MONum);
 
+    // Verify two-address constraints after leaving SSA form.
+    unsigned DefIdx;
+    if (!MRI->isSSA() && MO->isUse() &&
+        MI->isRegTiedToDefOperand(MONum, &DefIdx) &&
+        Reg != MI->getOperand(DefIdx).getReg())
+      report("Two-address instruction operands must be identical", MO, MONum);
 
     // Check register classes.
     if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
   if (MO->readsReg()) {
     regsLiveInButUnused.erase(Reg);
 
-    bool isKill = false;
-    unsigned defIdx;
-    if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
-      // A two-addr use counts as a kill if use and def are the same.
-      unsigned DefReg = MI->getOperand(defIdx).getReg();
-      if (Reg == DefReg)
-        isKill = true;
-      else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        report("Two-address instruction operands must be identical", MO, MONum);
-      }
-    } else
-      isKill = MO->isKill();
-
-    if (isKill)
+    if (MO->isKill())
       addRegWithSubRegs(regsKilled, Reg);
 
     // Check that LiveVars knows this kill.
@@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
     }
 
     // Check LiveInts liveness and kill.
-    if (TargetRegisterInfo::isVirtualRegister(Reg) &&
-        LiveInts && !LiveInts->isNotInMIMap(MI)) {
-      SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
-      if (LiveInts->hasInterval(Reg)) {
-        const LiveInterval &LI = LiveInts->getInterval(Reg);
-        if (!LI.liveAt(UseIdx)) {
-          report("No live range at use", MO, MONum);
-          *OS << UseIdx << " is not live in " << LI << '\n';
+    if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      SlotIndex UseIdx = LiveInts->getInstructionIndex(MI);
+      // Check the cached regunit intervals.
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+        for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+          if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) {
+            LiveRangeQuery LRQ(*LI, UseIdx);
+            if (!LRQ.valueIn()) {
+              report("No live range at use", MO, MONum);
+              *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI)
+                  << ' ' << *LI << '\n';
+            }
+            if (MO->isKill() && !LRQ.isKill()) {
+              report("Live range continues after kill flag", MO, MONum);
+              *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n';
+            }
+          }
         }
-        // Check for extra kill flags.
-        // Note that we allow missing kill flags for now.
-        if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
-          report("Live range continues after kill flag", MO, MONum);
-          *OS << "Live range: " << LI << '\n';
+      }
+
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        if (LiveInts->hasInterval(Reg)) {
+          // This is a virtual register interval.
+          const LiveInterval &LI = LiveInts->getInterval(Reg);
+          LiveRangeQuery LRQ(LI, UseIdx);
+          if (!LRQ.valueIn()) {
+            report("No live range at use", MO, MONum);
+            *OS << UseIdx << " is not live in " << LI << '\n';
+          }
+          // Check for extra kill flags.
+          // Note that we allow missing kill flags for now.
+          if (MO->isKill() && !LRQ.isKill()) {
+            report("Live range continues after kill flag", MO, MONum);
+            *OS << "Live range: " << LI << '\n';
+          }
+        } else {
+          report("Virtual register has no live interval", MO, MONum);
         }
-      } else {
-        report("Virtual register has no Live interval", MO, MONum);
       }
     }
 
@@ -1124,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() {
 
     const LiveInterval &LI = LiveInts->getInterval(Reg);
     assert(Reg == LI.reg && "Invalid reg to interval mapping");
+    verifyLiveInterval(LI);
+  }
 
-    for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
-         I!=E; ++I) {
-      VNInfo *VNI = *I;
-      const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+  // Verify all the cached regunit intervals.
+  for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i)
+    if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i))
+      verifyLiveInterval(*LI);
+}
 
-      if (!DefVNI) {
-        if (!VNI->isUnused()) {
-          report("Valno not live at def and not marked unused", MF);
-          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
-        }
-        continue;
-      }
+void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI,
+                                              VNInfo *VNI) {
+  if (VNI->isUnused())
+    return;
 
-      if (VNI->isUnused())
-        continue;
+  const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
 
-      if (DefVNI != VNI) {
-        report("Live range at def has different valno", MF);
-        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-            << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
-        continue;
-      }
+  if (!DefVNI) {
+    report("Valno not live at def and not marked unused", MF, LI);
+    *OS << "Valno #" << VNI->id << '\n';
+    return;
+  }
 
-      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
-      if (!MBB) {
-        report("Invalid definition index", MF);
-        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-            << " in " << LI << '\n';
-        continue;
-      }
+  if (DefVNI != VNI) {
+    report("Live range at def has different valno", MF, LI);
+    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+        << " where valno #" << DefVNI->id << " is live\n";
+    return;
+  }
 
-      if (VNI->isPHIDef()) {
-        if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
-          report("PHIDef value is not defined at MBB start", MF);
-          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-              << ", not at the beginning of BB#" << MBB->getNumber()
-              << " in " << LI << '\n';
-        }
-      } else {
-        // Non-PHI def.
-        const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
-        if (!MI) {
-          report("No instruction at def index", MF);
-          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-              << " in " << LI << '\n';
-          continue;
-        }
+  const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+  if (!MBB) {
+    report("Invalid definition index", MF, LI);
+    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+        << " in " << LI << '\n';
+    return;
+  }
 
-        bool hasDef = false;
-        bool isEarlyClobber = false;
-        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
-          if (!MOI->isReg() || !MOI->isDef())
-            continue;
-          if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-            if (MOI->getReg() != LI.reg)
-              continue;
-          } else {
-            if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
-                !TRI->regsOverlap(LI.reg, MOI->getReg()))
-              continue;
-          }
-          hasDef = true;
-          if (MOI->isEarlyClobber())
-            isEarlyClobber = true;
-        }
+  if (VNI->isPHIDef()) {
+    if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+      report("PHIDef value is not defined at MBB start", MBB, LI);
+      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+          << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+    }
+    return;
+  }
 
-        if (!hasDef) {
-          report("Defining instruction does not modify register", MI);
-          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
-        }
+  // Non-PHI def.
+  const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+  if (!MI) {
+    report("No instruction at def index", MBB, LI);
+    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+    return;
+  }
 
-        // Early clobber defs begin at USE slots, but other defs must begin at
-        // DEF slots.
-        if (isEarlyClobber) {
-          if (!VNI->def.isEarlyClobber()) {
-            report("Early clobber def must be at an early-clobber slot", MF);
-            *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-                << " in " << LI << '\n';
-          }
-        } else if (!VNI->def.isRegister()) {
-          report("Non-PHI, non-early clobber def must be at a register slot",
-                 MF);
-          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
-              << " in " << LI << '\n';
-        }
-      }
+  bool hasDef = false;
+  bool isEarlyClobber = false;
+  for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+    if (!MOI->isReg() || !MOI->isDef())
+      continue;
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      if (MOI->getReg() != LI.reg)
+        continue;
+    } else {
+      if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+          !TRI->hasRegUnit(MOI->getReg(), LI.reg))
+        continue;
     }
+    hasDef = true;
+    if (MOI->isEarlyClobber())
+      isEarlyClobber = true;
+  }
 
-    for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
-      const VNInfo *VNI = I->valno;
-      assert(VNI && "Live range has no valno");
+  if (!hasDef) {
+    report("Defining instruction does not modify register", MI);
+    *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+  }
 
-      if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
-        report("Foreign valno in live range", MF);
-        I->print(*OS);
-        *OS << " has a valno not in " << LI << '\n';
-      }
+  // Early clobber defs begin at USE slots, but other defs must begin at
+  // DEF slots.
+  if (isEarlyClobber) {
+    if (!VNI->def.isEarlyClobber()) {
+      report("Early clobber def must be at an early-clobber slot", MBB, LI);
+      *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+    }
+  } else if (!VNI->def.isRegister()) {
+    report("Non-PHI, non-early clobber def must be at a register slot",
+           MBB, LI);
+    *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+  }
+}
 
-      if (VNI->isUnused()) {
-        report("Live range valno is marked unused", MF);
-        I->print(*OS);
-        *OS << " in " << LI << '\n';
-      }
+void
+MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI,
+                                           LiveInterval::const_iterator I) {
+  const VNInfo *VNI = I->valno;
+  assert(VNI && "Live range has no valno");
+
+  if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+    report("Foreign valno in live range", MF, LI);
+    *OS << *I << " has a bad valno\n";
+  }
 
-      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
-      if (!MBB) {
-        report("Bad start of live segment, no basic block", MF);
-        I->print(*OS);
-        *OS << " in " << LI << '\n';
-        continue;
-      }
-      SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
-      if (I->start != MBBStartIdx && I->start != VNI->def) {
-        report("Live segment must begin at MBB entry or valno def", MBB);
-        I->print(*OS);
-        *OS << " in " << LI << '\n' << "Basic block starts at "
-            << MBBStartIdx << '\n';
-      }
+  if (VNI->isUnused()) {
+    report("Live range valno is marked unused", MF, LI);
+    *OS << *I << '\n';
+  }
 
-      const MachineBasicBlock *EndMBB =
-                                LiveInts->getMBBFromIndex(I->end.getPrevSlot());
-      if (!EndMBB) {
-        report("Bad end of live segment, no basic block", MF);
-        I->print(*OS);
-        *OS << " in " << LI << '\n';
-        continue;
-      }
+  const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+  if (!MBB) {
+    report("Bad start of live segment, no basic block", MF, LI);
+    *OS << *I << '\n';
+    return;
+  }
+  SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+  if (I->start != MBBStartIdx && I->start != VNI->def) {
+    report("Live segment must begin at MBB entry or valno def", MBB, LI);
+    *OS << *I << '\n';
+  }
 
-      // No more checks for live-out segments.
-      if (I->end == LiveInts->getMBBEndIdx(EndMBB))
-        continue;
+  const MachineBasicBlock *EndMBB =
+    LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+  if (!EndMBB) {
+    report("Bad end of live segment, no basic block", MF, LI);
+    *OS << *I << '\n';
+    return;
+  }
 
-      // The live segment is ending inside EndMBB
-      const MachineInstr *MI =
-        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
-      if (!MI) {
-        report("Live segment doesn't end at a valid instruction", EndMBB);
-        I->print(*OS);
-        *OS << " in " << LI << '\n' << "Basic block starts at "
-          << MBBStartIdx << '\n';
+  // No more checks for live-out segments.
+  if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+    return;
+
+  // RegUnit intervals are allowed dead phis.
+  if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() &&
+      I->start == VNI->def && I->end == VNI->def.getDeadSlot())
+    return;
+
+  // The live segment is ending inside EndMBB
+  const MachineInstr *MI =
+    LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+  if (!MI) {
+    report("Live segment doesn't end at a valid instruction", EndMBB, LI);
+    *OS << *I << '\n';
+    return;
+  }
+
+  // The block slot must refer to a basic block boundary.
+  if (I->end.isBlock()) {
+    report("Live segment ends at B slot of an instruction", EndMBB, LI);
+    *OS << *I << '\n';
+  }
+
+  if (I->end.isDead()) {
+    // Segment ends on the dead slot.
+    // That means there must be a dead def.
+    if (!SlotIndex::isSameInstr(I->start, I->end)) {
+      report("Live segment ending at dead slot spans instructions", EndMBB, LI);
+      *OS << *I << '\n';
+    }
+  }
+
+  // A live segment can only end at an early-clobber slot if it is being
+  // redefined by an early-clobber def.
+  if (I->end.isEarlyClobber()) {
+    if (I+1 == LI.end() || (I+1)->start != I->end) {
+      report("Live segment ending at early clobber slot must be "
+             "redefined by an EC def in the same instruction", EndMBB, LI);
+      *OS << *I << '\n';
+    }
+  }
+
+  // The following checks only apply to virtual registers. Physreg liveness
+  // is too weird to check.
+  if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+    // A live range can end with either a redefinition, a kill flag on a
+    // use, or a dead flag on a def.
+    bool hasRead = false;
+    bool hasDeadDef = false;
+    for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+      if (!MOI->isReg() || MOI->getReg() != LI.reg)
         continue;
-      }
+      if (MOI->readsReg())
+        hasRead = true;
+      if (MOI->isDef() && MOI->isDead())
+        hasDeadDef = true;
+    }
 
-      // The block slot must refer to a basic block boundary.
-      if (I->end.isBlock()) {
-        report("Live segment ends at B slot of an instruction", MI);
+    if (I->end.isDead()) {
+      if (!hasDeadDef) {
+        report("Instruction doesn't have a dead def operand", MI);
         I->print(*OS);
         *OS << " in " << LI << '\n';
       }
-
-      if (I->end.isDead()) {
-        // Segment ends on the dead slot.
-        // That means there must be a dead def.
-        if (!SlotIndex::isSameInstr(I->start, I->end)) {
-          report("Live segment ending at dead slot spans instructions", MI);
-          I->print(*OS);
-          *OS << " in " << LI << '\n';
-        }
-      }
-
-      // A live segment can only end at an early-clobber slot if it is being
-      // redefined by an early-clobber def.
-      if (I->end.isEarlyClobber()) {
-        if (I+1 == E || (I+1)->start != I->end) {
-          report("Live segment ending at early clobber slot must be "
-                 "redefined by an EC def in the same instruction", MI);
-          I->print(*OS);
-          *OS << " in " << LI << '\n';
-        }
+    } else {
+      if (!hasRead) {
+        report("Instruction ending live range doesn't read the register", MI);
+        *OS << *I << " in " << LI << '\n';
       }
+    }
+  }
 
-      // The following checks only apply to virtual registers. Physreg liveness
-      // is too weird to check.
-      if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-        // A live range can end with either a redefinition, a kill flag on a
-        // use, or a dead flag on a def.
-        bool hasRead = false;
-        bool hasDeadDef = false;
-        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
-          if (!MOI->isReg() || MOI->getReg() != LI.reg)
-            continue;
-          if (MOI->readsReg())
-            hasRead = true;
-          if (MOI->isDef() && MOI->isDead())
-            hasDeadDef = true;
-        }
-
-        if (I->end.isDead()) {
-          if (!hasDeadDef) {
-            report("Instruction doesn't have a dead def operand", MI);
-            I->print(*OS);
-            *OS << " in " << LI << '\n';
-          }
-        } else {
-          if (!hasRead) {
-            report("Instruction ending live range doesn't read the register",
-                   MI);
-            I->print(*OS);
-            *OS << " in " << LI << '\n';
-          }
-        }
-      }
+  // Now check all the basic blocks in this live segment.
+  MachineFunction::const_iterator MFI = MBB;
+  // Is this live range the beginning of a non-PHIDef VN?
+  if (I->start == VNI->def && !VNI->isPHIDef()) {
+    // Not live-in to any blocks.
+    if (MBB == EndMBB)
+      return;
+    // Skip this block.
+    ++MFI;
+  }
+  for (;;) {
+    assert(LiveInts->isLiveInToMBB(LI, MFI));
+    // We don't know how to track physregs into a landing pad.
+    if (!TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+        MFI->isLandingPad()) {
+      if (&*MFI == EndMBB)
+        break;
+      ++MFI;
+      continue;
+    }
 
-      // Now check all the basic blocks in this live segment.
-      MachineFunction::const_iterator MFI = MBB;
-      // Is this live range the beginning of a non-PHIDef VN?
-      if (I->start == VNI->def && !VNI->isPHIDef()) {
-        // Not live-in to any blocks.
-        if (MBB == EndMBB)
-          continue;
-        // Skip this block.
-        ++MFI;
+    // Is VNI a PHI-def in the current block?
+    bool IsPHI = VNI->isPHIDef() &&
+      VNI->def == LiveInts->getMBBStartIdx(MFI);
+
+    // Check that VNI is live-out of all predecessors.
+    for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+         PE = MFI->pred_end(); PI != PE; ++PI) {
+      SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+      const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
+
+      // All predecessors must have a live-out value.
+      if (!PVNI) {
+        report("Register not marked live out of predecessor", *PI, LI);
+        *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+            << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
+            << PEnd << '\n';
+        continue;
       }
-      for (;;) {
-        assert(LiveInts->isLiveInToMBB(LI, MFI));
-        // We don't know how to track physregs into a landing pad.
-        if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
-            MFI->isLandingPad()) {
-          if (&*MFI == EndMBB)
-            break;
-          ++MFI;
-          continue;
-        }
 
-        // Is VNI a PHI-def in the current block?
-        bool IsPHI = VNI->isPHIDef() &&
-                     VNI->def == LiveInts->getMBBStartIdx(MFI);
-
-        // Check that VNI is live-out of all predecessors.
-        for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
-             PE = MFI->pred_end(); PI != PE; ++PI) {
-          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
-          const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
-
-          // All predecessors must have a live-out value.
-          if (!PVNI) {
-            report("Register not marked live out of predecessor", *PI);
-            *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
-                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
-                << PEnd << " in " << LI << '\n';
-            continue;
-          }
-
-          // Only PHI-defs can take different predecessor values.
-          if (!IsPHI && PVNI != VNI) {
-            report("Different value live out of predecessor", *PI);
-            *OS << "Valno #" << PVNI->id << " live out of BB#"
-                << (*PI)->getNumber() << '@' << PEnd
-                << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
-                << '@' << LiveInts->getMBBStartIdx(MFI) << " in "
-                << PrintReg(Reg) << ": " << LI << '\n';
-          }
-        }
-        if (&*MFI == EndMBB)
-          break;
-        ++MFI;
+      // Only PHI-defs can take different predecessor values.
+      if (!IsPHI && PVNI != VNI) {
+        report("Different value live out of predecessor", *PI, LI);
+        *OS << "Valno #" << PVNI->id << " live out of BB#"
+            << (*PI)->getNumber() << '@' << PEnd
+            << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+            << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
       }
     }
+    if (&*MFI == EndMBB)
+      break;
+    ++MFI;
+  }
+}
 
-    // Check the LI only has one connected component.
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      ConnectedVNInfoEqClasses ConEQ(*LiveInts);
-      unsigned NumComp = ConEQ.Classify(&LI);
-      if (NumComp > 1) {
-        report("Multiple connected components in live interval", MF);
-        *OS << NumComp << " components in " << LI << '\n';
-        for (unsigned comp = 0; comp != NumComp; ++comp) {
-          *OS << comp << ": valnos";
-          for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
-               E = LI.vni_end(); I!=E; ++I)
-            if (comp == ConEQ.getEqClass(*I))
-              *OS << ' ' << (*I)->id;
-          *OS << '\n';
-        }
+void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
+  for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+       I!=E; ++I)
+    verifyLiveIntervalValue(LI, *I);
+
+  for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I)
+    verifyLiveIntervalSegment(LI, I);
+
+  // Check the LI only has one connected component.
+  if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+    ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+    unsigned NumComp = ConEQ.Classify(&LI);
+    if (NumComp > 1) {
+      report("Multiple connected components in live interval", MF, LI);
+      for (unsigned comp = 0; comp != NumComp; ++comp) {
+        *OS << comp << ": valnos";
+        for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+             E = LI.vni_end(); I!=E; ++I)
+          if (comp == ConEQ.getEqClass(*I))
+            *OS << ' ' << (*I)->id;
+        *OS << '\n';
       }
     }
   }
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 69d6d00..56526f2 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -88,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
                    cl::value_desc("pass-name"), cl::init("option-unspecified"));
 
+// Experimental option to run live inteerval analysis early.
+static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
+    cl::desc("Run live interval analysis earlier in the pipeline"));
+
 /// Allow standard passes to be disabled by command line options. This supports
 /// simple binary flags that either suppress the pass or do nothing.
 /// i.e. -disable-mypass=false has no effect.
@@ -452,7 +456,8 @@ void TargetPassConfig::addMachinePasses() {
   printAndVerify("After Instruction Selection");
 
   // Expand pseudo-instructions emitted by ISel.
-  addPass(&ExpandISelPseudosID);
+  if (addPass(&ExpandISelPseudosID))
+    printAndVerify("After ExpandISelPseudos");
 
   // Add passes that optimize machine instructions in SSA form.
   if (getOptLevel() != CodeGenOpt::None) {
@@ -648,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
     addPass(&MachineLoopInfoID);
     addPass(&PHIEliminationID);
   }
+
+  // Eventually, we want to run LiveIntervals before PHI elimination.
+  if (EarlyLiveIntervals)
+    addPass(&LiveIntervalsID);
+
   addPass(&TwoAddressInstructionPassID);
 
   if (EnableStrongPHIElim)
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4..9099862 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,8 @@ STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
+STATISTIC(NumLoadFold,   "Number of loads folded");
+STATISTIC(NumSelects,    "Number of selects optimized");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -108,12 +110,14 @@ namespace {
     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool optimizeSelect(MachineInstr *MI);
     bool isMoveImmediate(MachineInstr *MI,
                          SmallSet<unsigned, 4> &ImmDefRegs,
                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
                        SmallSet<unsigned, 4> &ImmDefRegs,
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
   };
 }
 
@@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
   return false;
 }
 
+/// Optimize a select instruction.
+bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) {
+  unsigned TrueOp = 0;
+  unsigned FalseOp = 0;
+  bool Optimizable = false;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
+    return false;
+  if (!Optimizable)
+    return false;
+  if (!TII->optimizeSelect(MI))
+    return false;
+  MI->eraseFromParent();
+  ++NumSelects;
+  return true;
+}
+
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+                                       unsigned &FoldAsLoadDefReg) {
+  if (!MI->canFoldAsLoad() || !MI->mayLoad())
+    return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.getNumDefs() != 1)
+    return false;
+
+  unsigned Reg = MI->getOperand(0).getReg();
+  // To reduce compilation time, we check MRI->hasOneUse when inserting
+  // loads. It should be checked when processing uses of the load, since
+  // uses can be removed during peephole.
+  if (!MI->getOperand(0).getSubReg() &&
+      TargetRegisterInfo::isVirtualRegister(Reg) &&
+      MRI->hasOneUse(Reg)) {
+    FoldAsLoadDefReg = Reg;
+    return true;
+  }
+  return false;
+}
+
 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   SmallPtrSet<MachineInstr*, 8> LocalMIs;
   SmallSet<unsigned, 4> ImmDefRegs;
   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+  unsigned FoldAsLoadDefReg;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
 
@@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
     LocalMIs.clear();
     ImmDefRegs.clear();
     ImmDefMIs.clear();
+    FoldAsLoadDefReg = 0;
 
-    bool First = true;
-    MachineBasicBlock::iterator PMII;
     for (MachineBasicBlock::iterator
            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
+      // We may be erasing MI below, increment MII now.
+      ++MII;
       LocalMIs.insert(MI);
 
+      // If there exists an instruction which belongs to the following
+      // categories, we will discard the load candidate.
       if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
           MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
           MI->hasUnmodeledSideEffects()) {
-        ++MII;
+        FoldAsLoadDefReg = 0;
         continue;
       }
-
-      if (MI->isBitcast()) {
-        if (optimizeBitcastInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
-      } else if (MI->isCompare()) {
-        if (optimizeCmpInstr(MI, MBB)) {
-          // MI is deleted.
-          LocalMIs.erase(MI);
-          Changed = true;
-          MII = First ? I->begin() : llvm::next(PMII);
-          continue;
-        }
+      if (MI->mayStore() || MI->isCall())
+        FoldAsLoadDefReg = 0;
+
+      if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) ||
+          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+          (MI->isSelect() && optimizeSelect(MI))) {
+        // MI is deleted.
+        LocalMIs.erase(MI);
+        Changed = true;
+        continue;
       }
 
       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
@@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
 
-      First = false;
-      PMII = MII;
-      ++MII;
+      // Check whether MI is a load candidate for folding into a later
+      // instruction. If MI is not a candidate, check whether we can fold an
+      // earlier load into MI.
+      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+        // can enable folding by converting SUB to CMP.
+        MachineInstr *DefMI = 0;
+        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+                                                      FoldAsLoadDefReg, DefMI);
+        if (FoldMI) {
+          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+          LocalMIs.erase(MI);
+          LocalMIs.erase(DefMI);
+          LocalMIs.insert(FoldMI);
+          MI->eraseFromParent();
+          DefMI->eraseFromParent();
+          ++NumLoadFold;
+
+          // MI is replaced with FoldMI.
+          Changed = true;
+          continue;
+        }
+      }
     }
   }
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8325f20..6b3a48e 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
 /// its virtual register, and it is guaranteed to be a block-local register.
 ///
 bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
-  // Check for non-debug uses or defs following MO.
-  // This is the most likely way to fail - fast path it.
-  MachineOperand *Next = &MO;
-  while ((Next = Next->getNextOperandForReg()))
-    if (!Next->isDebug())
-      return false;
-
   // If the register has ever been spilled or reloaded, we conservatively assume
   // it is a global register used in multiple blocks.
   if (StackSlotForVirtReg[MO.getReg()] != -1)
     return false;
 
   // Check that the use/def chain has exactly one operand - MO.
-  return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+  MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg());
+  if (&I.getOperand() != &MO)
+    return false;
+  return ++I == MRI->reg_nodbg_end();
 }
 
 /// addKillFlag - Set kill flags on last use of a virtual register.
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6ac5428..d0cff48 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
                << "********** Function: "
-               << ((Value*)mf.getFunction())->getName() << '\n');
+               << mf.getFunction()->getName() << '\n');
 
   MF = &mf;
   if (VerifyEnabled)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 733312f..9906334 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
   IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
 
   // Okay, merge "B1" into the same value number as "B0".
-  if (BValNo != ValLR->valno) {
-    // If B1 is killed by a PHI, then the merged live range must also be killed
-    // by the same PHI, as B0 and B1 can not overlap.
-    bool HasPHIKill = BValNo->hasPHIKill();
+  if (BValNo != ValLR->valno)
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
-    if (HasPHIKill)
-      ValLR->valno->setHasPHIKill(true);
-  }
   DEBUG(dbgs() << "   result = " << IntB << '\n');
 
   // If the source instruction was killing the source register before the
@@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
                                              LiveInterval &IntB,
                                              VNInfo *AValNo,
                                              VNInfo *BValNo) {
+  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
+  // the PHI values.
+  if (LIS->hasPHIKill(IntA, AValNo))
+    return true;
+
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
@@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   // AValNo is the value number in A that defines the copy, A3 in the example.
   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
   assert(AValNo && "COPY source not live");
-
-  // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+  if (AValNo->isPHIDef() || AValNo->isUnused())
     return false;
   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
@@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
+    // Kill flags are no longer accurate. They are recomputed after RA.
+    UseMO.setIsKill(false);
     if (TargetRegisterInfo::isPhysicalRegister(NewReg))
       UseMO.substPhysReg(NewReg, *TRI);
     else
@@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   // register live range doesn't need to be accurate as long as all the
   // defs are there.
 
+  // Delete the identity copy.
+  MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
+  LIS->RemoveMachineInstrFromMaps(CopyMI);
+  CopyMI->eraseFromParent();
+
   // We don't track kills for reserved registers.
   MRI->clearKillFlags(CP.getSrcReg());
 
@@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
       ++J;
   }
 
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
-         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned LHSValID = LHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[LHSValID]->setHasPHIKill(true);
-  }
-
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
-         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned RHSValID = RHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[RHSValID]->setHasPHIKill(true);
-  }
-
   // Clear kill flags where live ranges are extended.
   while (!LHSOldKills.empty())
     LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI);
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 110f478..9c1dba3 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   const MachineInstr *MI = SU->getInstr();
   unsigned Reg = MI->getOperand(OperIdx).getReg();
 
-  // SSA defs do not have output/anti dependencies.
+  // Singly defined vregs do not have output/anti dependencies.
   // The current operand is a def, so we have at least one.
-  //
-  // FIXME: This optimization is disabled pending PR13112.
-  //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
-  //  return;
+  // Check here if there are any others...
+  if (MRI.hasOneDef(Reg))
+    return;
 
   // Add output dependence to the next nearest def of this vreg.
   //
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 747bc44..1c485a0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -228,6 +228,9 @@ namespace {
     SDValue visitFP_EXTEND(SDNode *N);
     SDValue visitFNEG(SDNode *N);
     SDValue visitFABS(SDNode *N);
+    SDValue visitFCEIL(SDNode *N);
+    SDValue visitFTRUNC(SDNode *N);
+    SDValue visitFFLOOR(SDNode *N);
     SDValue visitBRCOND(SDNode *N);
     SDValue visitBR_CC(SDNode *N);
     SDValue visitLOAD(SDNode *N);
@@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   case ISD::FNEG:               return visitFNEG(N);
   case ISD::FABS:               return visitFABS(N);
+  case ISD::FFLOOR:             return visitFFLOOR(N);
+  case ISD::FCEIL:              return visitFCEIL(N);
+  case ISD::FTRUNC:             return visitFTRUNC(N);
   case ISD::BRCOND:             return visitBRCOND(N);
   case ISD::BR_CC:              return visitBR_CC(N);
   case ISD::LOAD:               return visitLOAD(N);
@@ -5679,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
       DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegal(ISD::FMA, VT)) {
+      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
 
     // fold (fadd (fmul x, y), z) -> (fma x, y, z)
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
@@ -5704,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5724,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
     if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+      return DAG.getNode(ISD::FNEG, dl, VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
-    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+    return DAG.getNode(ISD::FADD, dl, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
   // If 'unsafe math' is enabled, fold
@@ -5756,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
       DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegal(ISD::FMA, VT)) {
+      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
 
     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) {
-      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
+      return DAG.getNode(ISD::FMA, dl, VT,
                          N0.getOperand(0), N0.getOperand(1),
-                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1));
+                         DAG.getNode(ISD::FNEG, dl, VT, N1));
     }
 
     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
     // Note: Commutes FSUB operands.
     if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) {
-      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT,
-                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT,
+      return DAG.getNode(ISD::FMA, dl, VT,
+                         DAG.getNode(ISD::FNEG, dl, VT,
                          N1.getOperand(0)),
                          N1.getOperand(1), N0);
     }
+
+    // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+    if (N0.getOpcode() == ISD::FNEG && 
+        N0.getOperand(0).getOpcode() == ISD::FMUL &&
+        N0->hasOneUse() && N0.getOperand(0).hasOneUse()) {
+      SDValue N00 = N0.getOperand(0).getOperand(0);
+      SDValue N01 = N0.getOperand(0).getOperand(1);
+      return DAG.getNode(ISD::FMA, dl, VT,
+                         DAG.getNode(ISD::FNEG, dl, VT, N00), N01,
+                         DAG.getNode(ISD::FNEG, dl, VT, N1));
+    }
   }
 
   return SDValue();
@@ -6231,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFCEIL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fceil c1) -> fceil(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ftrunc c1) -> ftrunc(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ffloor c1) -> ffloor(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFABS(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
@@ -7822,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
         return SDValue();
 
-      // Widen the input vector by adding undef values.
-      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
-                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+      // If the element type of the input vector is not the same as
+      // the output element type, make concat_vectors based on input element
+      // type and then bitcast it to the output vector type.
+      //
+      // In another words avoid nodes like this:
+      //  <NODE> v16i8 = concat_vectors v4i16 v4i16
+      // Replace it with this one:
+      //  <NODE0> v8i16 = concat_vectors v4i16 v4i16
+      //  <NODE1> v16i8 = bitcast NODE0
+      EVT ItemType = VecIn1.getValueType().getVectorElementType();
+      if (ItemType != VT.getVectorElementType()) {
+        EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(),
+                                ItemType,
+                                VecIn1.getValueType().getVectorNumElements()*2);
+        // Widen the input vector by adding undef values.
+        VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT,
+                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+        VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1);
+      } else
+        // Widen the input vector by adding undef values.
+        VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                             VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+
     }
 
     // If VecIn2 is unused then change it to undef.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e5ea6e6..683fac6 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -789,6 +790,17 @@ FastISel::SelectInstruction(const Instruction *I) {
 
   MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
 
+  // As a special case, don't handle calls to builtin library functions that
+  // may be translated directly to target instructions.
+  if (const CallInst *Call = dyn_cast<CallInst>(I)) {
+    const Function *F = Call->getCalledFunction();
+    LibFunc::Func Func;
+    if (F && !F->hasLocalLinkage() && F->hasName() &&
+        LibInfo->getLibFunc(F->getName(), Func) &&
+        LibInfo->hasOptimizedCodeGen(Func))
+      return false;
+  }
+
   // First, try doing target-independent selection.
   if (SelectOperator(I, I->getOpcode())) {
     ++NumFastIselSuccessIndependent;
@@ -1040,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
   }
 }
 
-FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+FastISel::FastISel(FunctionLoweringInfo &funcInfo,
+                   const TargetLibraryInfo *libInfo)
   : FuncInfo(funcInfo),
     MRI(FuncInfo.MF->getRegInfo()),
     MFI(*FuncInfo.MF->getFrameInfo()),
@@ -1049,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo)
     TD(*TM.getTargetData()),
     TII(*TM.getInstrInfo()),
     TLI(*TM.getTargetLowering()),
-    TRI(*TM.getRegisterInfo()) {
+    TRI(*TM.getRegisterInfo()),
+    LibInfo(libInfo) {
 }
 
 FastISel::~FastISel() {}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 936c126..4488d27 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -411,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
   } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
                                             BA->getTargetFlags()));
+  } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(),
+                                                     TI->getOffset(),
+                                                     TI->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Glue &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b0776af..908ebb9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -428,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   DebugLoc dl = LD->getDebugLoc();
   if (VT.isFloatingPoint() || VT.isVector()) {
     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
-    if (TLI.isTypeLegal(intVT)) {
+    if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) {
       // Expand to a (misaligned) integer load of the same size,
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
@@ -436,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                                     LD->isNonTemporal(),
                                     LD->isInvariant(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
-      if (VT.isFloatingPoint() && LoadedVT != VT)
-        Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+      if (LoadedVT != VT)
+        Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
+                             ISD::ANY_EXTEND, dl, VT, Result);
 
       ValResult = Result;
       ChainResult = Chain;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5384576..84e41fc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -61,6 +61,7 @@ namespace llvm {
       if (isa<BasicBlockSDNode>(Node))     return true;
       if (isa<FrameIndexSDNode>(Node))     return true;
       if (isa<ConstantPoolSDNode>(Node))   return true;
+      if (isa<TargetIndexSDNode>(Node))    return true;
       if (isa<JumpTableSDNode>(Node))      return true;
       if (isa<ExternalSymbolSDNode>(Node)) return true;
       if (isa<BlockAddressSDNode>(Node))   return true;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b971b69..f4fe892 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -403,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddPointer(GA->getGlobal());
     ID.AddInteger(GA->getOffset());
     ID.AddInteger(GA->getTargetFlags());
+    ID.AddInteger(GA->getAddressSpace());
     break;
   }
   case ISD::BasicBlock:
@@ -438,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddInteger(CP->getTargetFlags());
     break;
   }
+  case ISD::TargetIndex: {
+    const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N);
+    ID.AddInteger(TI->getIndex());
+    ID.AddInteger(TI->getOffset());
+    ID.AddInteger(TI->getTargetFlags());
+    break;
+  }
   case ISD::LOAD: {
     const LoadSDNode *LD = cast<LoadSDNode>(N);
     ID.AddInteger(LD->getMemoryVT().getRawBits());
     ID.AddInteger(LD->getRawSubclassData());
+    ID.AddInteger(LD->getPointerInfo().getAddrSpace());
     break;
   }
   case ISD::STORE: {
     const StoreSDNode *ST = cast<StoreSDNode>(N);
     ID.AddInteger(ST->getMemoryVT().getRawBits());
     ID.AddInteger(ST->getRawSubclassData());
+    ID.AddInteger(ST->getPointerInfo().getAddrSpace());
     break;
   }
   case ISD::ATOMIC_CMP_SWAP:
@@ -467,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     const AtomicSDNode *AT = cast<AtomicSDNode>(N);
     ID.AddInteger(AT->getMemoryVT().getRawBits());
     ID.AddInteger(AT->getRawSubclassData());
+    ID.AddInteger(AT->getPointerInfo().getAddrSpace());
+    break;
+  }
+  case ISD::PREFETCH: {
+    const MemSDNode *PF = cast<MemSDNode>(N);
+    ID.AddInteger(PF->getPointerInfo().getAddrSpace());
     break;
   }
   case ISD::VECTOR_SHUFFLE: {
@@ -483,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     break;
   }
   } // end switch (N->getOpcode())
+
+  // Target specific memory nodes could also have address spaces to check.
+  if (N->isTargetMemoryOpcode())
+    ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
 }
 
 /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
@@ -1100,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
   ID.AddPointer(GV);
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
+  ID.AddInteger(GV->getType()->getAddressSpace());
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
@@ -1199,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
+                                     unsigned char TargetFlags) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+  ID.AddInteger(Index);
+  ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
+                                                    TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
@@ -2444,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       case ISD::FABS:
         V.clearSign();
         return getConstantFP(V, VT);
+      case ISD::FCEIL: {
+        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive);
+        if (fs == APFloat::opOK || fs == APFloat::opInexact)
+          return getConstantFP(V, VT);
+        break;
+      }
+      case ISD::FTRUNC: {
+        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero);
+        if (fs == APFloat::opOK || fs == APFloat::opInexact)
+          return getConstantFP(V, VT);
+        break;
+      }
+      case ISD::FFLOOR: {
+        APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative);
+        if (fs == APFloat::opOK || fs == APFloat::opInexact)
+          return getConstantFP(V, VT);
+        break;
+      }
       case ISD::FP_EXTEND: {
         bool ignored;
         // This can return overflow, underflow, or inexact; we don't care.
@@ -3901,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   ID.AddInteger(MemVT.getRawBits());
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -3973,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   ID.AddInteger(MemVT.getRawBits());
   SDValue Ops[] = {Chain, Ptr, Val};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4029,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   ID.AddInteger(MemVT.getRawBits());
   SDValue Ops[] = {Chain, Ptr};
   AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
@@ -4106,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
   if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
     void *IP = 0;
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
       cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -4225,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
                                      MMO->isNonTemporal(), 
                                      MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4314,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   ID.AddInteger(VT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
                                      MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4381,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   ID.AddInteger(SVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
                                      MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4405,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(ST->getMemoryVT().getRawBits());
   ID.AddInteger(ST->getRawSubclassData());
+  ID.AddInteger(ST->getPointerInfo().getAddrSpace());
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8cbe818..f3cf758 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 
   // Update successor info
   addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
-  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+  // TrueBB and FalseBB are always different unless the incoming IR is
+  // degenerate. This only happens when running llc on weird IR.
+  if (CB.TrueBB != CB.FalseBB)
+    addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = EVT::getEVT(I.getType());
+  EVT VT = TLI.getValueType(I.getType());
 
   if (I.getAlignment() * 8 < VT.getSizeInBits())
     report_fatal_error("Cannot generate unaligned atomic load");
@@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 
   SDValue InChain = getRoot();
 
-  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
 
   if (I.getAlignment() * 8 < VT.getSizeInBits())
     report_fatal_error("Cannot generate unaligned atomic store");
@@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
     return 0;
+  case Intrinsic::floor:
+    setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
   case Intrinsic::fma:
     setValue(&I, DAG.getNode(ISD::FMA, dl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -5506,6 +5514,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   return false;
 }
 
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+                                              unsigned Opcode) {
+  // Sanity check that it really is a unary floating-point call.
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      !I.onlyReadsMemory())
+    return false;
+
+  SDValue Tmp = getValue(I.getArgOperand(0));
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+  return true;
+}
 
 void SelectionDAGBuilder::visitCall(const CallInst &I) {
   // Handle inline assembly differently.
@@ -5536,150 +5560,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
 
     // Check for well-known libc/libm calls.  If the function is internal, it
     // can't be a library call.
-    if (!F->hasLocalLinkage() && F->hasName()) {
-      StringRef Name = F->getName();
-      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
-          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
-          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+    LibFunc::Func Func;
+    if (!F->hasLocalLinkage() && F->hasName() &&
+        LibInfo->getLibFunc(F->getName(), Func) &&
+        LibInfo->hasOptimizedCodeGen(Func)) {
+      switch (Func) {
+      default: break;
+      case LibFunc::copysign:
+      case LibFunc::copysignf:
+      case LibFunc::copysignl:
         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
-            I.getType() == I.getArgOperand(1)->getType()) {
+            I.getType() == I.getArgOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue LHS = getValue(I.getArgOperand(0));
           SDValue RHS = getValue(I.getArgOperand(1));
           setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
-                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
-                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::fabs:
+      case LibFunc::fabsf:
+      case LibFunc::fabsl:
+        if (visitUnaryFloatCall(I, ISD::FABS))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
-                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
-                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::sin:
+      case LibFunc::sinf:
+      case LibFunc::sinl:
+        if (visitUnaryFloatCall(I, ISD::FSIN))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
-                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
-                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::cos:
+      case LibFunc::cosf:
+      case LibFunc::cosl:
+        if (visitUnaryFloatCall(I, ISD::FCOS))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
-                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
-                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::sqrt:
+      case LibFunc::sqrtf:
+      case LibFunc::sqrtl:
+        if (visitUnaryFloatCall(I, ISD::FSQRT))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
-                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
-                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::floor:
+      case LibFunc::floorf:
+      case LibFunc::floorl:
+        if (visitUnaryFloatCall(I, ISD::FFLOOR))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
-                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
-                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::nearbyint:
+      case LibFunc::nearbyintf:
+      case LibFunc::nearbyintl:
+        if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
-                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
-                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::ceil:
+      case LibFunc::ceilf:
+      case LibFunc::ceill:
+        if (visitUnaryFloatCall(I, ISD::FCEIL))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
-                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
-                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::rint:
+      case LibFunc::rintf:
+      case LibFunc::rintl:
+        if (visitUnaryFloatCall(I, ISD::FRINT))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
-                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
-                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::trunc:
+      case LibFunc::truncf:
+      case LibFunc::truncl:
+        if (visitUnaryFloatCall(I, ISD::FTRUNC))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
-                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
-                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::log2:
+      case LibFunc::log2f:
+      case LibFunc::log2l:
+        if (visitUnaryFloatCall(I, ISD::FLOG2))
           return;
-        }
-      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
-                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
-                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+      case LibFunc::exp2l:
+        if (visitUnaryFloatCall(I, ISD::FEXP2))
           return;
-        }
-      } else if (Name == "memcmp") {
+        break;
+      case LibFunc::memcmp:
         if (visitMemCmpCall(I))
           return;
+        break;
       }
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index d0fde6f..4090002 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -520,6 +520,7 @@ private:
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
   bool visitMemCmpCall(const CallInst &I);
+  bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
   void visitAtomicLoad(const LoadInst &I);
   void visitAtomicStore(const StoreInst &I);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 9fc225f..13cd011 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
   case ISD::ConstantPool:               return "ConstantPool";
+  case ISD::TargetIndex:                return "TargetIndex";
   case ISD::ExternalSymbol:             return "ExternalSymbol";
   case ISD::BlockAddress:               return "BlockAddress";
   case ISD::INTRINSIC_WO_CHAIN:
@@ -409,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " " << offset;
     if (unsigned int TF = CP->getTargetFlags())
       OS << " [TF=" << TF << ']';
+  } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) {
+    OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">";
+    if (unsigned TF = TI->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
     OS << "<";
     const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 287c679..4e5e3ba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -979,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
   if (TM.Options.EnableFastISel)
-    FastIS = TLI.createFastISel(*FuncInfo);
+    FastIS = TLI.createFastISel(*FuncInfo, LibInfo);
 
   // Iterate over all basic blocks in the function.
   ReversePostOrderTraversal<const Function*> RPOT(&Fn);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index dff9b2c..6820175 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = DCI.isBeforeLegalize() ?
+        EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
           getPointerTy() : getShiftAmountTy(N0.getValueType());
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
@@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            EVT ShiftTy = DCI.isBeforeLegalize() ?
+            EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
               getPointerTy() : getShiftAmountTy(N0.getValueType());
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
@@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
         NewC = NewC.lshr(ShiftBits);
         if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) {
-          EVT ShiftTy = DCI.isBeforeLegalize() ?
+          EVT ShiftTy = DCI.isBeforeLegalizeOps() ?
             getPointerTy() : getShiftAmountTy(N0.getValueType());
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
@@ -2464,7 +2464,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
     // if it is not already.
     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
-    if (NewCond != Cond)
+    if (NewCond != Cond && (DCI.isBeforeLegalizeOps() ||
+          getCondCodeAction(NewCond, N0.getValueType()) == Legal))
       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
   }
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 9a751c1..4a2b7ec 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
     // Adjust RegAssign if a register assignment is killed at VNI->def.  We
     // want to avoid calculating the live range of the source register if
     // possible.
-    AssignI.find(VNI->def.getPrevSlot());
+    AssignI.find(Def.getPrevSlot());
     if (!AssignI.valid() || AssignI.start() >= Def)
       continue;
     // If MI doesn't kill the assigned register, just leave it.
@@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() {
   for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
        VI != VE; ++VI) {
     VNInfo *VNI = *VI;
+    if (VNI->isUnused())
+      continue;
     VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
     assert(ParentVNI && "Parent not live at complement def");
 
@@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() {
   for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
        VI != VE; ++VI) {
     VNInfo *VNI = *VI;
+    if (VNI->isUnused())
+      continue;
     VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def);
     const DomPair &Dom = NearestDom[ParentVNI->id];
     if (!Dom.first || Dom.second == VNI->def)
@@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     if (ParentVNI->isUnused())
       continue;
     unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
-    VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
-    VNI->setIsPHIDef(ParentVNI->isPHIDef());
+    defValue(RegIdx, ParentVNI, ParentVNI->def);
 
     // Force rematted values to be recomputed everywhere.
     // The new live ranges may be truncated.
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 43a6ad8..a04ac3f 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -28,15 +28,10 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
-// SSPBufferSize - The lower bound for a buffer to be considered for stack
-// smashing protection.
-static cl::opt<unsigned>
-SSPBufferSize("stack-protector-buffer-size", cl::init(8),
-              cl::desc("Lower bound for a buffer to be considered for "
-                       "stack protection"));
-
 namespace {
   class StackProtector : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -46,7 +41,7 @@ namespace {
     Function *F;
     Module *M;
 
-    DominatorTree* DT;
+    DominatorTree *DT;
 
     /// InsertStackProtectors - Insert code into the prologue and epilogue of
     /// the function.
@@ -60,6 +55,11 @@ namespace {
     /// check fails.
     BasicBlock *CreateFailBB();
 
+    /// ContainsProtectableArray - Check whether the type either is an array or
+    /// contains an array of sufficient size so that we need stack protectors
+    /// for it.
+    bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const;
+
     /// RequiresStackProtector - Check whether or not this function needs a
     /// stack protector based upon the stack protector level.
     bool RequiresStackProtector() const;
@@ -70,8 +70,8 @@ namespace {
     }
     StackProtector(const TargetLowering *tli)
       : FunctionPass(ID), TLI(tli) {
-        initializeStackProtectorPass(*PassRegistry::getPassRegistry());
-      }
+      initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
@@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) {
   DT = getAnalysisIfAvailable<DominatorTree>();
 
   if (!RequiresStackProtector()) return false;
-  
+
   return InsertStackProtectors();
 }
 
+/// ContainsProtectableArray - Check whether the type either is an array or
+/// contains a char array of sufficient size so that we need stack protectors
+/// for it.
+bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const {
+  if (!Ty) return false;
+  if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+    const TargetMachine &TM = TLI->getTargetMachine();
+    if (!AT->getElementType()->isIntegerTy(8)) {
+      Triple Trip(TM.getTargetTriple());
+
+      // If we're on a non-Darwin platform or we're inside of a structure, don't
+      // add stack protectors unless the array is a character array.
+      if (InStruct || !Trip.isOSDarwin())
+          return false;
+    }
+
+    // If an array has more than SSPBufferSize bytes of allocated space, then we
+    // emit stack protectors.
+    if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT))
+      return true;
+  }
+
+  const StructType *ST = dyn_cast<StructType>(Ty);
+  if (!ST) return false;
+
+  for (StructType::element_iterator I = ST->element_begin(),
+         E = ST->element_end(); I != E; ++I)
+    if (ContainsProtectableArray(*I, true))
+      return true;
+
+  return false;
+}
+
 /// RequiresStackProtector - Check whether or not this function needs a stack
 /// protector based upon the stack protector level. The heuristic we use is to
 /// add a guard variable to functions that call alloca, and functions with
@@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const {
   if (!F->hasFnAttr(Attribute::StackProtect))
     return false;
 
-  const TargetData *TD = TLI->getTargetData();
-
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
     BasicBlock *BB = I;
 
@@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const {
           // protectors.
           return true;
 
-        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
-          // If an array has more than SSPBufferSize bytes of allocated space,
-          // then we emit stack protectors.
-          if (SSPBufferSize <= TD->getTypeAllocSize(AT))
-            return true;
+        if (ContainsProtectableArray(AI->getAllocatedType()))
+          return true;
       }
   }
 
@@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() {
       //     StackGuardSlot = alloca i8*
       //     StackGuard = load __stack_chk_guard
       //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
-      // 
+      //
       PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
       unsigned AddressSpace, Offset;
       if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
         Constant *OffsetVal =
           ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
-        
+
         StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
                                       PointerType::get(PtrTy, AddressSpace));
       } else {
-        StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); 
+        StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
       }
 
       BasicBlock &Entry = F->getEntryBlock();
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index c6fdc73..5b06195 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
       LiveInterval &SrcInterval = LI->getInterval(SrcReg);
       SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
       VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex);
+      (void)SrcVNI;
       assert(SrcVNI);
-      SrcVNI->setHasPHIKill(true);
       continue;
     }
 
@@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
     VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
     assert(DestVNI);
-    DestVNI->setIsPHIDef(true);
   
     // Prior to PHI elimination, the live ranges of PHIs begin at their defining
     // instruction. After PHI elimination, PHI instructions are replaced by VNs
@@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
   SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
   VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
                                         LI->getVNInfoAllocator());
-  CopyVNI->setIsPHIDef(true);
   CopyLI.addRange(LiveRange(MBBStartIndex,
                             DestCopyIndex.getRegSlot(),
                             CopyVNI));
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a3d6771..ddee6b2 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
 }
 
 /// Return the default expected latency for a def based on it's opcode.
-unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
+unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
                                             const MachineInstr *DefMI) const {
   if (DefMI->mayLoad())
-    return ItinData->SchedModel->LoadLatency;
+    return SchedModel->LoadLatency;
   if (isHighLatencyDef(DefMI->getOpcode()))
-    return ItinData->SchedModel->HighLatency;
+    return SchedModel->HighLatency;
   return 1;
 }
 
@@ -638,7 +638,7 @@ static int computeDefOperandLatency(
       return 1;
   }
   else if(ItinData->isEmpty())
-    return TII->defaultDefLatency(ItinData, DefMI);
+    return TII->defaultDefLatency(ItinData->SchedModel, DefMI);
 
   // ...operand lookup required
   return -1;
@@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData,
 
   // Expected latency is the max of the stage latency and itinerary props.
   if (!FindMin)
-    InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+    InstrLatency = std::max(InstrLatency,
+                            defaultDefLatency(ItinData->SchedModel, DefMI));
   return InstrLatency;
 }
 
@@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData,
 
   // Expected latency is the max of the stage latency and itinerary props.
   if (!FindMin)
-    InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
+    InstrLatency = std::max(InstrLatency,
+                            defaultDefLatency(ItinData->SchedModel, DefMI));
   return InstrLatency;
 }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e4c0119..aa601af 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -30,6 +30,7 @@
 #define DEBUG_TYPE "twoaddrinstr"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -55,18 +56,19 @@ STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
 STATISTIC(NumAggrCommuted    , "Number of instructions aggressively commuted");
 STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
 STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
-STATISTIC(NumReMats,           "Number of instructions re-materialized");
-STATISTIC(NumDeletes,          "Number of dead instructions deleted");
 STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
   class TwoAddressInstructionPass : public MachineFunctionPass {
+    MachineFunction *MF;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     const InstrItineraryData *InstrItins;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
+    SlotIndexes *Indexes;
+    LiveIntervals *LIS;
     AliasAnalysis *AA;
     CodeGenOpt::Level OptLevel;
 
@@ -92,16 +94,9 @@ namespace {
                               unsigned Reg,
                               MachineBasicBlock::iterator OldPos);
 
-    bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
-                             MachineInstr *MI, MachineInstr *DefMI,
-                             MachineBasicBlock *MBB, unsigned Loc);
-
     bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
                            unsigned &LastDef);
 
-    MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
-                                   unsigned Dist);
-
     bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
                                MachineInstr *MI, MachineBasicBlock *MBB,
                                unsigned Dist);
@@ -117,14 +112,6 @@ namespace {
                             MachineFunction::iterator &mbbi,
                             unsigned RegA, unsigned RegB, unsigned Dist);
 
-    typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
-    bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
-                               SmallVector<NewKill, 4> &NewKills,
-                               MachineBasicBlock *MBB, unsigned Dist);
-    bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
-                           MachineBasicBlock::iterator &nmi,
-                           MachineFunction::iterator &mbbi, unsigned Dist);
-
     bool isDefTooClose(unsigned Reg, unsigned Dist,
                        MachineInstr *MI, MachineBasicBlock *MBB);
 
@@ -150,6 +137,11 @@ namespace {
     void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
                      SmallPtrSet<MachineInstr*, 8> &Processed);
 
+    typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList;
+    typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
+    bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
+    void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
+
     void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
 
     /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
@@ -167,6 +159,8 @@ namespace {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
       AU.addPreserved<LiveVariables>();
+      AU.addPreserved<SlotIndexes>();
+      AU.addPreserved<LiveIntervals>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   // appropriate location, we can try to sink the current instruction
   // past it.
   if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      KillMI->isTerminator())
+      KillMI == OldPos || KillMI->isTerminator())
     return false;
 
   // If any of the definitions are used by another instruction between the
@@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
       }
     }
   }
+  assert(KillMO && "Didn't find kill");
 
   // Update kill and LV information.
   KillMO->setIsKill(false);
@@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   MBB->remove(MI);
   MBB->insert(KillPos, MI);
 
+  if (LIS)
+    LIS->handleMove(MI);
+
   ++Num3AddrSunk;
   return true;
 }
 
-/// isTwoAddrUse - Return true if the specified MI is using the specified
-/// register as a two-address operand.
-static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
-  const MCInstrDesc &MCID = UseMI->getDesc();
-  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = UseMI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == Reg &&
-        (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
-      // Earlier use is a two-address one.
-      return true;
-  }
-  return false;
-}
-
-/// isProfitableToReMat - Return true if the heuristics determines it is likely
-/// to be profitable to re-materialize the definition of Reg rather than copy
-/// the register.
-bool
-TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
-                                         const TargetRegisterClass *RC,
-                                         MachineInstr *MI, MachineInstr *DefMI,
-                                         MachineBasicBlock *MBB, unsigned Loc) {
-  bool OtherUse = false;
-  for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
-         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-    MachineOperand &UseMO = UI.getOperand();
-    MachineInstr *UseMI = UseMO.getParent();
-    MachineBasicBlock *UseMBB = UseMI->getParent();
-    if (UseMBB == MBB) {
-      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
-      if (DI != DistanceMap.end() && DI->second == Loc)
-        continue;  // Current use.
-      OtherUse = true;
-      // There is at least one other use in the MBB that will clobber the
-      // register.
-      if (isTwoAddrUse(UseMI, Reg))
-        return true;
-    }
-  }
-
-  // If other uses in MBB are not two-address uses, then don't remat.
-  if (OtherUse)
-    return false;
-
-  // No other uses in the same block, remat if it's defined in the same
-  // block so it does not unnecessarily extend the live range.
-  return MBB == DefMI->getParent();
-}
-
 /// NoUseAfterLastDef - Return true if there are no intervening uses between the
 /// last instruction in the MBB that defines the specified register and the
 /// two-address instruction which is being processed. It also returns the last
@@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
   return !(LastUse > LastDef && LastUse < Dist);
 }
 
-MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
-                                                         MachineBasicBlock *MBB,
-                                                         unsigned Dist) {
-  unsigned LastUseDist = 0;
-  MachineInstr *LastUse = 0;
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineOperand &MO = I.getOperand();
-    MachineInstr *MI = MO.getParent();
-    if (MI->getParent() != MBB || MI->isDebugValue())
-      continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
-    if (DI == DistanceMap.end())
-      continue;
-    if (DI->second >= Dist)
-      continue;
-
-    if (MO.isUse() && DI->second > LastUseDist) {
-      LastUse = DI->first;
-      LastUseDist = DI->second;
-    }
-  }
-  return LastUse;
-}
-
 /// isCopyToReg - Return true if the specified MI is a copy instruction or
 /// a extract_subreg instruction. It also returns the source and destination
 /// registers and whether they are physical registers by reference.
@@ -538,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
 }
 
 
-/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// isProfitableToCommute - Return true if it's potentially profitable to commute
 /// the two-address instruction that's being processed.
 bool
 TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB,
@@ -628,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
     if (LV)
       // Update live variables
       LV->replaceKillInstruction(RegC, MI, NewMI);
+    if (Indexes)
+      Indexes->replaceMachineInstrInMaps(MI, NewMI);
 
     mbbi->insert(mi, NewMI);           // Insert the new inst
     mbbi->erase(mi);                   // Nuke the old inst.
@@ -676,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
     DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
     bool Sunk = false;
 
+    if (Indexes)
+      Indexes->replaceMachineInstrInMaps(mi, NewMI);
+
     if (NewMI->findRegisterUseOperand(RegB, false, TRI))
       // FIXME: Temporary workaround. If the new instruction doesn't
       // uses RegB, convertToThreeAddress must have created more
@@ -785,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
   return;
 }
 
-/// isSafeToDelete - If the specified instruction does not produce any side
-/// effects and all of its defs are dead, then it's safe to delete.
-static bool isSafeToDelete(MachineInstr *MI,
-                           const TargetInstrInfo *TII,
-                           SmallVector<unsigned, 4> &Kills) {
-  if (MI->mayStore() || MI->isCall())
-    return false;
-  if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
-    return false;
-
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
-      continue;
-    if (MO.isDef() && !MO.isDead())
-      return false;
-    if (MO.isUse() && MO.isKill())
-      Kills.push_back(MO.getReg());
-  }
-  return true;
-}
-
-/// canUpdateDeletedKills - Check if all the registers listed in Kills are
-/// killed by instructions in MBB preceding the current instruction at
-/// position Dist.  If so, return true and record information about the
-/// preceding kills in NewKills.
-bool TwoAddressInstructionPass::
-canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
-                      SmallVector<NewKill, 4> &NewKills,
-                      MachineBasicBlock *MBB, unsigned Dist) {
-  while (!Kills.empty()) {
-    unsigned Kill = Kills.back();
-    Kills.pop_back();
-    if (TargetRegisterInfo::isPhysicalRegister(Kill))
-      return false;
-
-    MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
-    if (!LastKill)
-      return false;
-
-    bool isModRef = LastKill->definesRegister(Kill);
-    NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
-                                      LastKill));
-  }
-  return true;
-}
-
-/// DeleteUnusedInstr - If an instruction with a tied register operand can
-/// be safely deleted, just delete it.
-bool
-TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
-                                             MachineBasicBlock::iterator &nmi,
-                                             MachineFunction::iterator &mbbi,
-                                             unsigned Dist) {
-  // Check if the instruction has no side effects and if all its defs are dead.
-  SmallVector<unsigned, 4> Kills;
-  if (!isSafeToDelete(mi, TII, Kills))
-    return false;
-
-  // If this instruction kills some virtual registers, we need to
-  // update the kill information. If it's not possible to do so,
-  // then bail out.
-  SmallVector<NewKill, 4> NewKills;
-  if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
-    return false;
-
-  if (LV) {
-    while (!NewKills.empty()) {
-      MachineInstr *NewKill = NewKills.back().second;
-      unsigned Kill = NewKills.back().first.first;
-      bool isDead = NewKills.back().first.second;
-      NewKills.pop_back();
-      if (LV->removeVirtualRegisterKilled(Kill, mi)) {
-        if (isDead)
-          LV->addVirtualRegisterDead(Kill, NewKill);
-        else
-          LV->addVirtualRegisterKilled(Kill, NewKill);
-      }
-    }
-  }
-
-  mbbi->erase(mi); // Nuke the old inst.
-  mi = nmi;
-  return true;
-}
-
 /// RescheduleMIBelowKill - If there is one more local instruction that reads
 /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
 /// instruction in order to eliminate the need for the copy.
@@ -1000,6 +843,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
   // Update live variables
   LV->removeVirtualRegisterKilled(Reg, KillMI);
   LV->addVirtualRegisterKilled(Reg, MI);
+  if (LIS)
+    LIS->handleMove(MI);
 
   DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI);
   return true;
@@ -1154,6 +999,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
   // Update live variables
   LV->removeVirtualRegisterKilled(Reg, KillMI);
   LV->addVirtualRegisterKilled(Reg, MI);
+  if (LIS)
+    LIS->handleMove(KillMI);
 
   DEBUG(dbgs() << "\trescheduled kill: " << *KillMI);
   return true;
@@ -1180,16 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   assert(TargetRegisterInfo::isVirtualRegister(regB) &&
          "cannot make instruction into two-address form");
-
-  // If regA is dead and the instruction can be deleted, just delete
-  // it so it doesn't clobber regB.
   bool regBKilled = isKilled(MI, regB, MRI, TII);
-  if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
-      DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
-    ++NumDeletes;
-    DEBUG(dbgs() << "\tdeleted unused instruction.\n");
-    return true; // Done with this instruction."
-  }
 
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
@@ -1273,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     if (NewOpc != 0) {
       const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
       if (UnfoldMCID.getNumDefs() == 1) {
-        MachineFunction &MF = *mbbi->getParent();
-
         // Unfold the load.
         DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);
         const TargetRegisterClass *RC =
           TRI->getAllocatableClass(
-            TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF));
+            TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
-        if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
+        if (!TII->unfoldMemoryOperand(*MF, &MI, Reg,
                                       /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
                                       NewMIs)) {
           DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -1359,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   return false;
 }
 
+// Collect tied operands of MI that need to be handled.
+// Rewrite trivial cases immediately.
+// Return true if any tied operands where found, including the trivial ones.
+bool TwoAddressInstructionPass::
+collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool AnyOps = false;
+  unsigned NumOps = MI->isInlineAsm() ?
+    MI->getNumOperands() : MCID.getNumOperands();
+
+  for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+    unsigned DstIdx = 0;
+    if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+      continue;
+    AnyOps = true;
+    MachineOperand &SrcMO = MI->getOperand(SrcIdx);
+    MachineOperand &DstMO = MI->getOperand(DstIdx);
+    unsigned SrcReg = SrcMO.getReg();
+    unsigned DstReg = DstMO.getReg();
+    // Tied constraint already satisfied?
+    if (SrcReg == DstReg)
+      continue;
+
+    assert(SrcReg && SrcMO.isUse() && "two address instruction invalid");
+
+    // Deal with <undef> uses immediately - simply rewrite the src operand.
+    if (SrcMO.isUndef()) {
+      // Constrain the DstReg register class if required.
+      if (TargetRegisterInfo::isVirtualRegister(DstReg))
+        if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
+                                                             TRI, *MF))
+          MRI->constrainRegClass(DstReg, RC);
+      SrcMO.setReg(DstReg);
+      DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
+      continue;
+    }
+    TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx));
+  }
+  return AnyOps;
+}
+
+// Process a list of tied MI operands that all use the same source register.
+// The tied pairs are of the form (SrcIdx, DstIdx).
+void
+TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
+                                            TiedPairList &TiedPairs,
+                                            unsigned &Dist) {
+  bool IsEarlyClobber = false;
+  bool RemovedKillFlag = false;
+  bool AllUsesCopied = true;
+  unsigned LastCopiedReg = 0;
+  unsigned RegB = 0;
+  for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+    unsigned SrcIdx = TiedPairs[tpi].first;
+    unsigned DstIdx = TiedPairs[tpi].second;
+
+    const MachineOperand &DstMO = MI->getOperand(DstIdx);
+    unsigned RegA = DstMO.getReg();
+    IsEarlyClobber |= DstMO.isEarlyClobber();
+
+    // Grab RegB from the instruction because it may have changed if the
+    // instruction was commuted.
+    RegB = MI->getOperand(SrcIdx).getReg();
+
+    if (RegA == RegB) {
+      // The register is tied to multiple destinations (or else we would
+      // not have continued this far), but this use of the register
+      // already matches the tied destination.  Leave it.
+      AllUsesCopied = false;
+      continue;
+    }
+    LastCopiedReg = RegA;
+
+    assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+           "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+    // First, verify that we don't have a use of "a" in the instruction
+    // (a = b + a for example) because our transformation will not
+    // work. This should never occur because we are in SSA form.
+    for (unsigned i = 0; i != MI->getNumOperands(); ++i)
+      assert(i == DstIdx ||
+             !MI->getOperand(i).isReg() ||
+             MI->getOperand(i).getReg() != RegA);
+#endif
+
+    // Emit a copy.
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+            TII->get(TargetOpcode::COPY), RegA).addReg(RegB);
+
+    // Update DistanceMap.
+    MachineBasicBlock::iterator PrevMI = MI;
+    --PrevMI;
+    DistanceMap.insert(std::make_pair(PrevMI, Dist));
+    DistanceMap[MI] = ++Dist;
+
+    SlotIndex CopyIdx;
+    if (Indexes)
+      CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot();
+
+    DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI);
+
+    MachineOperand &MO = MI->getOperand(SrcIdx);
+    assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() &&
+           "inconsistent operand info for 2-reg pass");
+    if (MO.isKill()) {
+      MO.setIsKill(false);
+      RemovedKillFlag = true;
+    }
+
+    // Make sure regA is a legal regclass for the SrcIdx operand.
+    if (TargetRegisterInfo::isVirtualRegister(RegA) &&
+        TargetRegisterInfo::isVirtualRegister(RegB))
+      MRI->constrainRegClass(RegA, MRI->getRegClass(RegB));
+
+    MO.setReg(RegA);
+
+    // Propagate SrcRegMap.
+    SrcRegMap[RegA] = RegB;
+  }
+
+
+  if (AllUsesCopied) {
+    if (!IsEarlyClobber) {
+      // Replace other (un-tied) uses of regB with LastCopiedReg.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+          if (MO.isKill()) {
+            MO.setIsKill(false);
+            RemovedKillFlag = true;
+          }
+          MO.setReg(LastCopiedReg);
+        }
+      }
+    }
+
+    // Update live variables for regB.
+    if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) {
+      MachineBasicBlock::iterator PrevMI = MI;
+      --PrevMI;
+      LV->addVirtualRegisterKilled(RegB, PrevMI);
+    }
+
+  } else if (RemovedKillFlag) {
+    // Some tied uses of regB matched their destination registers, so
+    // regB is still used in this instruction, but a kill flag was
+    // removed from a different tied use of regB, so now we need to add
+    // a kill flag to one of the remaining uses of regB.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+        MO.setIsKill(true);
+        break;
+      }
+    }
+  }
+}
+
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
-bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  const TargetMachine &TM = MF.getTarget();
-  MRI = &MF.getRegInfo();
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
+  MF = &Func;
+  const TargetMachine &TM = MF->getTarget();
+  MRI = &MF->getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
   InstrItins = TM.getInstrItineraryData();
+  Indexes = getAnalysisIfAvailable<SlotIndexes>();
   LV = getAnalysisIfAvailable<LiveVariables>();
+  LIS = getAnalysisIfAvailable<LiveIntervals>();
   AA = &getAnalysis<AliasAnalysis>();
   OptLevel = TM.getOptLevel();
 
@@ -1375,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
   DEBUG(dbgs() << "********** Function: "
-        << MF.getFunction()->getName() << '\n');
+        << MF->getFunction()->getName() << '\n');
 
   // This pass takes the function out of SSA form.
   MRI->leaveSSA();
 
-  // ReMatRegs - Keep track of the registers whose def's are remat'ed.
-  BitVector ReMatRegs(MRI->getNumVirtRegs());
-
-  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
-    TiedOperandMap;
-  TiedOperandMap TiedOperands(4);
+  TiedOperandMap TiedOperands;
 
   SmallPtrSet<MachineInstr*, 8> Processed;
-  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+  for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
        mbbi != mbbe; ++mbbi) {
     unsigned Dist = 0;
     DistanceMap.clear();
@@ -1407,50 +1400,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       if (mi->isRegSequence())
         RegSequences.push_back(&*mi);
 
-      const MCInstrDesc &MCID = mi->getDesc();
-      bool FirstTied = true;
-
       DistanceMap.insert(std::make_pair(mi, ++Dist));
 
       ProcessCopy(&*mi, &*mbbi, Processed);
 
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
-      unsigned NumOps = mi->isInlineAsm()
-        ? mi->getNumOperands() : MCID.getNumOperands();
-      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
-        unsigned DstIdx = 0;
-        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
-          continue;
-
-        if (FirstTied) {
-          FirstTied = false;
-          ++NumTwoAddressInstrs;
-          DEBUG(dbgs() << '\t' << *mi);
-        }
-
-        assert(mi->getOperand(SrcIdx).isReg() &&
-               mi->getOperand(SrcIdx).getReg() &&
-               mi->getOperand(SrcIdx).isUse() &&
-               "two address instruction invalid");
-
-        unsigned regB = mi->getOperand(SrcIdx).getReg();
-
-        // Deal with <undef> uses immediately - simply rewrite the src operand.
-        if (mi->getOperand(SrcIdx).isUndef()) {
-          unsigned DstReg = mi->getOperand(DstIdx).getReg();
-          // Constrain the DstReg register class if required.
-          if (TargetRegisterInfo::isVirtualRegister(DstReg))
-            if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
-                                                                 TRI, MF))
-              MRI->constrainRegClass(DstReg, RC);
-          mi->getOperand(SrcIdx).setReg(DstReg);
-          DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi);
-          continue;
-        }
-        TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
+      if (!collectTiedOperands(mi, TiedOperands)) {
+        mi = nmi;
+        continue;
       }
 
+      ++NumTwoAddressInstrs;
+      MadeChange = true;
+      DEBUG(dbgs() << '\t' << *mi);
+
       // If the instruction has a single pair of tied operands, try some
       // transformations that may either eliminate the tied operands or
       // improve the opportunities for coalescing away the register copy.
@@ -1477,139 +1441,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       // Now iterate over the information collected above.
       for (TiedOperandMap::iterator OI = TiedOperands.begin(),
              OE = TiedOperands.end(); OI != OE; ++OI) {
-        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
-
-        bool IsEarlyClobber = false;
-        bool RemovedKillFlag = false;
-        bool AllUsesCopied = true;
-        unsigned LastCopiedReg = 0;
-        unsigned regB = OI->first;
-        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
-          unsigned SrcIdx = TiedPairs[tpi].first;
-          unsigned DstIdx = TiedPairs[tpi].second;
-
-          const MachineOperand &DstMO = mi->getOperand(DstIdx);
-          unsigned regA = DstMO.getReg();
-          IsEarlyClobber |= DstMO.isEarlyClobber();
-
-          // Grab regB from the instruction because it may have changed if the
-          // instruction was commuted.
-          regB = mi->getOperand(SrcIdx).getReg();
-
-          if (regA == regB) {
-            // The register is tied to multiple destinations (or else we would
-            // not have continued this far), but this use of the register
-            // already matches the tied destination.  Leave it.
-            AllUsesCopied = false;
-            continue;
-          }
-          LastCopiedReg = regA;
-
-          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
-                 "cannot make instruction into two-address form");
-
-#ifndef NDEBUG
-          // First, verify that we don't have a use of "a" in the instruction
-          // (a = b + a for example) because our transformation will not
-          // work. This should never occur because we are in SSA form.
-          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
-            assert(i == DstIdx ||
-                   !mi->getOperand(i).isReg() ||
-                   mi->getOperand(i).getReg() != regA);
-#endif
-
-          // Emit a copy or rematerialize the definition.
-          bool isCopy = false;
-          const TargetRegisterClass *rc = MRI->getRegClass(regB);
-          MachineInstr *DefMI = MRI->getUniqueVRegDef(regB);
-          // If it's safe and profitable, remat the definition instead of
-          // copying it.
-          if (DefMI &&
-              DefMI->isAsCheapAsAMove() &&
-              DefMI->isSafeToReMat(TII, AA, regB) &&
-              isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
-            DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
-            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
-            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
-            ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
-            ++NumReMats;
-          } else {
-            BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
-                    regA).addReg(regB);
-            isCopy = true;
-          }
-
-          // Update DistanceMap.
-          MachineBasicBlock::iterator prevMI = prior(mi);
-          DistanceMap.insert(std::make_pair(prevMI, Dist));
-          DistanceMap[mi] = ++Dist;
-
-          DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
-
-          MachineOperand &MO = mi->getOperand(SrcIdx);
-          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
-                 "inconsistent operand info for 2-reg pass");
-          if (MO.isKill()) {
-            MO.setIsKill(false);
-            RemovedKillFlag = true;
-          }
-
-          // Make sure regA is a legal regclass for the SrcIdx operand.
-          if (TargetRegisterInfo::isVirtualRegister(regA) &&
-              TargetRegisterInfo::isVirtualRegister(regB))
-            MRI->constrainRegClass(regA, MRI->getRegClass(regB));
-
-          MO.setReg(regA);
-
-          if (isCopy)
-            // Propagate SrcRegMap.
-            SrcRegMap[regA] = regB;
-        }
-
-        if (AllUsesCopied) {
-          if (!IsEarlyClobber) {
-            // Replace other (un-tied) uses of regB with LastCopiedReg.
-            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-              MachineOperand &MO = mi->getOperand(i);
-              if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
-                if (MO.isKill()) {
-                  MO.setIsKill(false);
-                  RemovedKillFlag = true;
-                }
-                MO.setReg(LastCopiedReg);
-              }
-            }
-          }
-
-          // Update live variables for regB.
-          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
-            LV->addVirtualRegisterKilled(regB, prior(mi));
-
-        } else if (RemovedKillFlag) {
-          // Some tied uses of regB matched their destination registers, so
-          // regB is still used in this instruction, but a kill flag was
-          // removed from a different tied use of regB, so now we need to add
-          // a kill flag to one of the remaining uses of regB.
-          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-            MachineOperand &MO = mi->getOperand(i);
-            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
-              MO.setIsKill(true);
-              break;
-            }
-          }
-        }
-
-        // We didn't change anything if there was a single tied pair, and that
-        // pair didn't require copies.
-        if (AllUsesCopied || TiedPairs.size() > 1) {
-          MadeChange = true;
-
-          // Schedule the source copy / remat inserted to form two-address
-          // instruction. FIXME: Does it matter the distance map may not be
-          // accurate after it's scheduled?
-          TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
-        }
-
+        processTiedPairs(mi, OI->second, Dist);
         DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
       }
 
@@ -1634,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  // Some remat'ed instructions are dead.
-  for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
-    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
-    if (MRI->use_nodbg_empty(VReg)) {
-      MachineInstr *DefMI = MRI->getVRegDef(VReg);
-      DefMI->eraseFromParent();
-    }
-  }
-
   // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
   // SSA form. It's now safe to de-SSA.
   MadeChange |= EliminateRegSequences();
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index a4e0d8e..797662b 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -167,9 +167,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address,
     const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu);
     if (lineTable) {
       // Get the index of the row we're looking for in the line table.
-      uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(
-          cu, DW_AT_high_pc, -1ULL);
-      uint32_t rowIndex = lineTable->lookupAddress(address, hiPC);
+      uint32_t rowIndex = lineTable->lookupAddress(address);
       if (rowIndex != -1U) {
         const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex];
         // Take file/line info from the line table.
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index 117fa31..d99575d 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
 DWARFDebugLine::State::~State() {}
 
 void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
+  if (Sequence::Empty) {
+    // Record the beginning of instruction sequence.
+    Sequence::Empty = false;
+    Sequence::LowPC = Address;
+    Sequence::FirstRowIndex = row;
+  }
   ++row;  // Increase the row number.
   LineTable::appendRow(*this);
+  if (EndSequence) {
+    // Record the end of instruction sequence.
+    Sequence::HighPC = Address;
+    Sequence::LastRowIndex = row;
+    if (Sequence::isValid())
+      LineTable::appendSequence(*this);
+    Sequence::reset();
+  }
   Row::postAppend();
 }
 
+void DWARFDebugLine::State::finalize() {
+  row = DoneParsingLineTable;
+  if (!Sequence::Empty) {
+    fprintf(stderr, "warning: last sequence in debug line table is not"
+                    "terminated!\n");
+  }
+  // Sort all sequences so that address lookup will work faster.
+  if (!Sequences.empty()) {
+    std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+    // Note: actually, instruction address ranges of sequences should not
+    // overlap (in shared objects and executables). If they do, the address
+    // lookup would still work, though, but result would be ambiguous.
+    // We don't report warning in this case. For example,
+    // sometimes .so compiled from multiple object files contains a few
+    // rudimentary sequences for address ranges [0x0, 0xsomething).
+  }
+}
+
 DWARFDebugLine::DumpingState::~DumpingState() {}
 
-void DWARFDebugLine::DumpingState::finalize(uint32_t offset) {
+void DWARFDebugLine::DumpingState::finalize() {
   LineTable::dump(OS);
 }
 
@@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
     fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
                     " have ended at 0x%8.8x but it ended ad 0x%8.8x\n",
             prologue_offset, end_prologue_offset, *offset_ptr);
+    return false;
   }
-  return end_prologue_offset;
+  return true;
 }
 
 bool
@@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
     }
   }
 
-  state.finalize(*offset_ptr);
+  state.finalize();
 
   return end_offset;
 }
 
-static bool findMatchingAddress(const DWARFDebugLine::Row& row1,
-                                const DWARFDebugLine::Row& row2) {
-  return row1.Address < row2.Address;
-}
-
 uint32_t
-DWARFDebugLine::LineTable::lookupAddress(uint64_t address,
-                                         uint64_t cu_high_pc) const {
-  uint32_t index = UINT32_MAX;
-  if (!Rows.empty()) {
-    // Use the lower_bound algorithm to perform a binary search since we know
-    // that our line table data is ordered by address.
-    DWARFDebugLine::Row row;
-    row.Address = address;
-    typedef std::vector<Row>::const_iterator iterator;
-    iterator begin_pos = Rows.begin();
-    iterator end_pos = Rows.end();
-    iterator pos = std::lower_bound(begin_pos, end_pos, row,
-                                    findMatchingAddress);
-    if (pos == end_pos) {
-      if (address < cu_high_pc)
-        return Rows.size()-1;
-    } else {
-      // Rely on fact that we are using a std::vector and we can do
-      // pointer arithmetic to find the row index (which will be one less
-      // that what we found since it will find the first position after
-      // the current address) since std::vector iterators are just
-      // pointers to the container type.
-      index = pos - begin_pos;
-      if (pos->Address > address) {
-        if (index > 0)
-          --index;
-        else
-          index = UINT32_MAX;
-      }
-    }
+DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+  uint32_t unknown_index = UINT32_MAX;
+  if (Sequences.empty())
+    return unknown_index;
+  // First, find an instruction sequence containing the given address.
+  DWARFDebugLine::Sequence sequence;
+  sequence.LowPC = address;
+  SequenceIter first_seq = Sequences.begin();
+  SequenceIter last_seq = Sequences.end();
+  SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence,
+      DWARFDebugLine::Sequence::orderByLowPC);
+  DWARFDebugLine::Sequence found_seq;
+  if (seq_pos == last_seq) {
+    found_seq = Sequences.back();
+  } else if (seq_pos->LowPC == address) {
+    found_seq = *seq_pos;
+  } else {
+    if (seq_pos == first_seq)
+      return unknown_index;
+    found_seq = *(seq_pos - 1);
+  }
+  if (!found_seq.containsPC(address))
+    return unknown_index;
+  // Search for instruction address in the rows describing the sequence.
+  // Rows are stored in a vector, so we may use arithmetical operations with
+  // iterators.
+  DWARFDebugLine::Row row;
+  row.Address = address;
+  RowIter first_row = Rows.begin() + found_seq.FirstRowIndex;
+  RowIter last_row = Rows.begin() + found_seq.LastRowIndex;
+  RowIter row_pos = std::lower_bound(first_row, last_row, row,
+      DWARFDebugLine::Row::orderByAddress);
+  if (row_pos == last_row) {
+    return found_seq.LastRowIndex - 1;
+  }
+  uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row);
+  if (row_pos->Address > address) {
+    if (row_pos == first_row)
+      return unknown_index;
+    else
+      index--;
   }
-  return index; // Failed to find address.
+  return index;
 }
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
index a8c0669..6382b45 100644
--- a/lib/DebugInfo/DWARFDebugLine.h
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -88,6 +88,10 @@ public:
     void reset(bool default_is_stmt);
     void dump(raw_ostream &OS) const;
 
+    static bool orderByAddress(const Row& LHS, const Row& RHS) {
+      return LHS.Address < RHS.Address;
+    }
+
     // The program-counter value corresponding to a machine instruction
     // generated by the compiler.
     uint64_t Address;
@@ -125,21 +129,63 @@ public:
             EpilogueBegin:1;
   };
 
+  // Represents a series of contiguous machine instructions. Line table for each
+  // compilation unit may consist of multiple sequences, which are not
+  // guaranteed to be in the order of ascending instruction address.
+  struct Sequence {
+    // Sequence describes instructions at address range [LowPC, HighPC)
+    // and is described by line table rows [FirstRowIndex, LastRowIndex).
+    uint64_t LowPC;
+    uint64_t HighPC;
+    unsigned FirstRowIndex;
+    unsigned LastRowIndex;
+    bool Empty;
+
+    Sequence() { reset(); }
+    void reset() {
+      LowPC = 0;
+      HighPC = 0;
+      FirstRowIndex = 0;
+      LastRowIndex = 0;
+      Empty = true;
+    }
+    static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
+      return LHS.LowPC < RHS.LowPC;
+    }
+    bool isValid() const {
+      return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex);
+    }
+    bool containsPC(uint64_t pc) const {
+      return (LowPC <= pc && pc < HighPC);
+    }
+  };
+
   struct LineTable {
     void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
+    void appendSequence(const DWARFDebugLine::Sequence &sequence) {
+      Sequences.push_back(sequence);
+    }
     void clear() {
       Prologue.clear();
       Rows.clear();
+      Sequences.clear();
     }
 
-    uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const;
+    // Returns the index of the row with file/line info for a given address,
+    // or -1 if there is no such row.
+    uint32_t lookupAddress(uint64_t address) const;
     void dump(raw_ostream &OS) const;
 
     struct Prologue Prologue;
-    std::vector<Row> Rows;
+    typedef std::vector<Row> RowVector;
+    typedef RowVector::const_iterator RowIter;
+    typedef std::vector<Sequence> SequenceVector;
+    typedef SequenceVector::const_iterator SequenceIter;
+    RowVector Rows;
+    SequenceVector Sequences;
   };
 
-  struct State : public Row, public LineTable {
+  struct State : public Row, public Sequence, public LineTable {
     // Special row codes.
     enum {
       StartParsingLineTable = 0,
@@ -150,8 +196,11 @@ public:
     virtual ~State();
 
     virtual void appendRowToMatrix(uint32_t offset);
-    virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; }
-    virtual void reset() { Row::reset(Prologue.DefaultIsStmt); }
+    virtual void finalize();
+    virtual void reset() {
+      Row::reset(Prologue.DefaultIsStmt);
+      Sequence::reset();
+    }
 
     // The row number that starts at zero for the prologue, and increases for
     // each row added to the matrix.
@@ -161,7 +210,7 @@ public:
   struct DumpingState : public State {
     DumpingState(raw_ostream &OS) : OS(OS) {}
     virtual ~DumpingState();
-    virtual void finalize(uint32_t offset);
+    virtual void finalize();
   private:
     raw_ostream &OS;
   };
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index a744d0c..4afc900 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -501,7 +501,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
     return 0;
   }
 
-  if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+  if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 &&
+      ExecutionEngine::MCJITCtor == 0) {
     if (ErrorStr)
       *ErrorStr = "JIT has not been linked in.";
   }
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index a942299..97995ad 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -361,7 +361,7 @@ bool JIT::removeModule(Module *M) {
 
   MutexGuard locked(lock);
 
-  if (jitstate->getModule() == M) {
+  if (jitstate && jitstate->getModule() == M) {
     delete jitstate;
     jitstate = 0;
   }
@@ -433,13 +433,18 @@ GenericValue JIT::runFunction(Function *F,
       }
       break;
     case 1:
-      if (FTy->getNumParams() == 1 &&
-          FTy->getParamType(0)->isIntegerTy(32)) {
+      if (FTy->getParamType(0)->isIntegerTy(32)) {
         GenericValue rv;
         int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
         rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
         return rv;
       }
+      if (FTy->getParamType(0)->isPointerTy()) {
+        GenericValue rv;
+        int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0])));
+        return rv;
+      }
       break;
     }
   }
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 7be6ef8..61bc119 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -461,6 +461,9 @@ namespace {
     /// allocateCodeSection - Allocate memory for a code section.
     uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                  unsigned SectionID) {
+      // Grow the required block size to account for the block header
+      Size += sizeof(*CurBlock);
+
       // FIXME: Alignement handling.
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 84274c0..99c65ec 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MutexGuard.h"
 #include "llvm/Target/TargetData.h"
 
 using namespace llvm;
@@ -43,20 +44,40 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
 
-  // If the target supports JIT code generation, create the JIT.
-  if (TargetJITInfo *TJ = TM->getJITInfo())
-    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), GVsWithCode);
-
-  if (ErrorStr)
-    *ErrorStr = "target does not support JIT code generation";
-  return 0;
+  return new MCJIT(M, TM, new MCJITMemoryManager(JMM), GVsWithCode);
 }
 
-MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
-             RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
-  : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
+MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
+             bool AllocateGVsWithCode)
+  : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
+    isCompiled(false), M(m), OS(Buffer)  {
 
   setTargetData(TM->getTargetData());
+}
+
+MCJIT::~MCJIT() {
+  delete MemMgr;
+  delete TM;
+}
+
+void MCJIT::emitObject(Module *m) {
+  /// Currently, MCJIT only supports a single module and the module passed to
+  /// this function call is expected to be the contained module.  The module
+  /// is passed as a parameter here to prepare for multiple module support in 
+  /// the future.
+  assert(M == m);
+
+  // Get a thread lock to make sure we aren't trying to compile multiple times
+  MutexGuard locked(lock);
+
+  // FIXME: Track compilation state on a per-module basis when multiple modules
+  //        are supported.
+  // Re-compilation is not supported
+  if (isCompiled)
+    return;
+
+  PassManager PM;
+
   PM.add(new TargetData(*TM->getTargetData()));
 
   // Turn the machine code intermediate representation into bytes in memory
@@ -69,23 +90,22 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
   // FIXME: When we support multiple modules, we'll want to move the code
   // gen and finalization out of the constructor here and do it more
   // on-demand as part of getPointerToFunction().
-  PM.run(*M);
+  PM.run(*m);
   // Flush the output buffer so the SmallVector gets its data.
   OS.flush();
 
   // Load the object into the dynamic linker.
-  MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
+  MemoryBuffer* MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
                                                           Buffer.size()),
                                                 "", false);
   if (Dyld.loadObject(MB))
     report_fatal_error(Dyld.getErrorString());
+
   // Resolve any relocations.
   Dyld.resolveRelocations();
-}
 
-MCJIT::~MCJIT() {
-  delete MemMgr;
-  delete TM;
+  // FIXME: Add support for per-module compilation state
+  isCompiled = true;
 }
 
 void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
@@ -93,6 +113,10 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
 }
 
 void *MCJIT::getPointerToFunction(Function *F) {
+  // FIXME: Add support for per-module compilation state
+  if (!isCompiled)
+    emitObject(M);
+
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
     void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
@@ -100,6 +124,7 @@ void *MCJIT::getPointerToFunction(Function *F) {
     return Addr;
   }
 
+  // FIXME: Should the Dyld be retaining module information? Probably not.
   // FIXME: Should we be using the mangler for this? Probably.
   StringRef BaseName = F->getName();
   if (BaseName[0] == '\1')
@@ -218,6 +243,10 @@ GenericValue MCJIT::runFunction(Function *F,
 
 void *MCJIT::getPointerToNamedFunction(const std::string &Name,
                                        bool AbortOnFailure) {
+  // FIXME: Add support for per-module compilation state
+  if (!isCompiled)
+    emitObject(M);
+
   if (!isSymbolSearchingDisabled() && MemMgr) {
     void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
     if (ptr)
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 2b3df98..138a7b6 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -23,23 +23,22 @@ namespace llvm {
 // blah blah. Purely in get-it-up-and-limping mode for now.
 
 class MCJIT : public ExecutionEngine {
-  MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
-        RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
+  MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr,
+        bool AllocateGVsWithCode);
 
   TargetMachine *TM;
   MCContext *Ctx;
   RTDyldMemoryManager *MemMgr;
+  RuntimeDyld Dyld;
 
-  // FIXME: These may need moved to a separate 'jitstate' member like the
-  // non-MC JIT does for multithreading and such. Just keep them here for now.
-  PassManager PM;
+  // FIXME: Add support for multiple modules
+  bool isCompiled;
   Module *M;
-  // FIXME: This really doesn't belong here.
+
+  // FIXME: Move these to a single container which manages JITed objects
   SmallVector<char, 4096> Buffer; // Working buffer into which we JIT.
   raw_svector_ostream OS;
 
-  RuntimeDyld Dyld;
-
 public:
   ~MCJIT();
 
@@ -91,6 +90,14 @@ public:
                                     TargetMachine *TM);
 
   // @}
+
+protected:
+  /// emitObject -- Generate a JITed object in memory from the specified module
+  /// Currently, MCJIT only supports a single module and the module passed to
+  /// this function call is expected to be the contained module.  The module
+  /// is passed as a parameter here to prepare for multiple module support in 
+  /// the future.
+  void emitObject(Module *M);
 };
 
 } // End llvm namespace
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index b464040..a98ddc0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -108,7 +108,8 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
       CommonSymbols[*i] = Size;
     } else {
       if (SymType == object::SymbolRef::ST_Function ||
-          SymType == object::SymbolRef::ST_Data) {
+          SymType == object::SymbolRef::ST_Data ||
+          SymType == object::SymbolRef::ST_Unknown) {
         uint64_t FileOffset;
         StringRef SectionData;
         section_iterator si = obj->end_sections();
@@ -333,15 +334,31 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
 }
 
 uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
-  // TODO: There is only ARM far stub now. We should add the Thumb stub,
-  // and stubs for branches Thumb - ARM and ARM - Thumb.
   if (Arch == Triple::arm) {
+    // TODO: There is only ARM far stub now. We should add the Thumb stub,
+    // and stubs for branches Thumb - ARM and ARM - Thumb.
     uint32_t *StubAddr = (uint32_t*)Addr;
     *StubAddr = 0xe51ff004; // ldr pc,<label>
     return (uint8_t*)++StubAddr;
-  }
-  else
+  } else if (Arch == Triple::mipsel) {
+    uint32_t *StubAddr = (uint32_t*)Addr;
+    // 0:   3c190000        lui     t9,%hi(addr).
+    // 4:   27390000        addiu   t9,t9,%lo(addr).
+    // 8:   03200008        jr      t9.
+    // c:   00000000        nop.
+    const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000;
+    const unsigned JrT9Instr = 0x03200008, NopInstr = 0x0;
+
+    *StubAddr = LuiT9Instr;
+    StubAddr++;
+    *StubAddr = AdduiT9Instr;
+    StubAddr++;
+    *StubAddr = JrT9Instr;
+    StubAddr++;
+    *StubAddr = NopInstr;
     return Addr;
+  }
+  return Addr;
 }
 
 // Assign an address to a symbol name and resolve all the relocations
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 39aed34..0aea598 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -55,7 +55,7 @@ public:
 
   const MemoryBuffer& getBuffer() const { return *InputData; }
 
-  // Methods for type inquiry through isa, cast, and dyn_cast
+  // Methods for type inquiry through isa, cast and dyn_cast
   static inline bool classof(const Binary *v) {
     return (isa<ELFObjectFile<target_endianness, is64Bits> >(v)
             && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v)));
@@ -208,10 +208,9 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
   case ELF::R_X86_64_32:
   case ELF::R_X86_64_32S: {
     Value += Addend;
-    // FIXME: Handle the possibility of this assertion failing
-    assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
-           (Type == ELF::R_X86_64_32S &&
-            (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
+    assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) ||
+           (Type == ELF::R_X86_64_32S && 
+             ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
     uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
     uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
     *Target = TruncatedAddr;
@@ -220,7 +219,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
   case ELF::R_X86_64_PC32: {
     uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
     int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
-    assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+    assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN);
     int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
     *Placeholder = TruncOffset;
     break;
@@ -248,7 +247,7 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
     }
     default:
       // There are other relocation types, but it appears these are the
-      //  only ones currently used by the LLVM ELF object writer
+      // only ones currently used by the LLVM ELF object writer
       llvm_unreachable("Relocation type not implemented yet!");
       break;
   }
@@ -307,6 +306,44 @@ void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
   }
 }
 
+void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress,
+                                           uint32_t FinalAddress,
+                                           uint32_t Value,
+                                           uint32_t Type,
+                                           int32_t Addend) {
+  uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+  Value += Addend;
+
+  DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " << LocalAddress
+               << " FinalAddress: " << format("%p",FinalAddress)
+               << " Value: " << format("%x",Value)
+               << " Type: " << format("%x",Type)
+               << " Addend: " << format("%x",Addend)
+               << "\n");
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Not implemented relocation type!");
+    break;
+  case ELF::R_MIPS_32:
+    *TargetPtr = Value + (*TargetPtr);
+    break;
+  case ELF::R_MIPS_26:
+    *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2);
+    break;
+  case ELF::R_MIPS_HI16:
+    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    Value += ((*TargetPtr) & 0x0000ffff) << 16;
+    *TargetPtr = ((*TargetPtr) & 0xffff0000) |
+                 (((Value + 0x8000) >> 16) & 0xffff);
+    break;
+   case ELF::R_MIPS_LO16:
+    Value += ((*TargetPtr) & 0x0000ffff);
+    *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
+    break;
+   }
+}
+
 void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
                                        uint64_t FinalAddress,
                                        uint64_t Value,
@@ -327,6 +364,12 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
                          (uint32_t)(Value & 0xffffffffL), Type,
                          (uint32_t)(Addend & 0xffffffffL));
     break;
+  case Triple::mips:    // Fall through.
+  case Triple::mipsel:
+    resolveMIPSRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+                          (uint32_t)(Value & 0xffffffffL), Type,
+                          (uint32_t)(Addend & 0xffffffffL));
+    break;
   default: llvm_unreachable("Unsupported CPU type!");
   }
 }
@@ -424,6 +467,53 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
                         Section.StubOffset, RelType, 0);
       Section.StubOffset += getMaxStubSize();
     }
+  } else if (Arch == Triple::mipsel && RelType == ELF::R_MIPS_26) {
+    // This is an Mips branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
+    SectionEntry &Section = Sections[Rel.SectionID];
+    uint8_t *Target = Section.Address + Rel.Offset;
+    uint32_t *TargetAddress = (uint32_t *)Target;
+
+    // Extract the addend from the instruction.
+    uint32_t Addend = ((*TargetAddress) & 0x03ffffff) << 2;
+
+    Value.Addend += Addend;
+
+    //  Look up for existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end()) {
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address +
+                        i->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+
+      // Creating Hi and Lo relocations for the filled stub instructions.
+      RelocationEntry REHi(Rel.SectionID,
+                           StubTargetAddr - Section.Address,
+                           ELF::R_MIPS_HI16, Value.Addend);
+      RelocationEntry RELo(Rel.SectionID,
+                           StubTargetAddr - Section.Address + 4,
+                           ELF::R_MIPS_LO16, Value.Addend);
+
+      if (Value.SymbolName) {
+        addRelocationForSymbol(REHi, Value.SymbolName);
+        addRelocationForSymbol(RELo, Value.SymbolName);
+      } else {
+        addRelocationForSection(REHi, Value.SectionID);
+        addRelocationForSection(RELo, Value.SectionID);
+      }
+
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address +
+                        Section.StubOffset, RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
   } else {
     RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
     if (Value.SymbolName)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index e413f78..eade49e 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -42,6 +42,12 @@ protected:
                             uint32_t Type,
                             int32_t Addend);
 
+  void resolveMIPSRelocation(uint8_t *LocalAddress,
+                             uint32_t FinalAddress,
+                             uint32_t Value,
+                             uint32_t Type,
+                             int32_t Addend);
+
   virtual void resolveRelocation(uint8_t *LocalAddress,
                                  uint64_t FinalAddress,
                                  uint64_t Value,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index c38ca69..3d89994 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -161,6 +161,8 @@ protected:
   inline unsigned getMaxStubSize() {
     if (Arch == Triple::arm || Arch == Triple::thumb)
       return 8; // 32-bit instruction and 32-bit address
+    else if (Arch == Triple::mipsel)
+      return 16;
     else
       return 0;
   }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index afba2e8..a6599bf 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -16,6 +16,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
+#include "llvm/TypeFinder.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
@@ -595,13 +596,13 @@ void ModuleLinker::computeTypeMapping() {
   // At this point, the destination module may have a type "%foo = { i32 }" for
   // example.  When the source module got loaded into the same LLVMContext, if
   // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
-  std::vector<StructType*> SrcStructTypes;
-  SrcM->findUsedStructTypes(SrcStructTypes, true);
+  TypeFinder SrcStructTypes;
+  SrcStructTypes.run(*SrcM, true);
   SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
                                                  SrcStructTypes.end());
 
-  std::vector<StructType*> DstStructTypes;
-  DstM->findUsedStructTypes(DstStructTypes, true);
+  TypeFinder DstStructTypes;
+  DstStructTypes.run(*DstM, true);
   SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
                                                  DstStructTypes.end());
 
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index f11e686..99bff96 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMMC
   MCObjectStreamer.cpp
   MCObjectWriter.cpp
   MCPureStreamer.cpp
+  MCRegisterInfo.cpp
   MCSection.cpp
   MCSectionCOFF.cpp
   MCSectionELF.cpp
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 0aa0c98..b7d2c28 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/LEB128.h"
 
 using namespace llvm;
 
@@ -719,9 +720,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   Data.clear();
   raw_svector_ostream OSE(Data);
   if (LF.isSigned())
-    MCObjectWriter::EncodeSLEB128(Value, OSE);
+    encodeSLEB128(Value, OSE);
   else
-    MCObjectWriter::EncodeULEB128(Value, OSE);
+    encodeULEB128(Value, OSE);
   OSE.flush();
   return OldSize != LF.getContents().size();
 }
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 75eaf80..4c63e43 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/ADT/Hashing.h"
@@ -361,7 +362,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
       OS << char(dwarf::DW_LNS_const_add_pc);
     else {
       OS << char(dwarf::DW_LNS_advance_pc);
-      MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+      encodeULEB128(AddrDelta, OS);
     }
     OS << char(dwarf::DW_LNS_extended_op);
     OS << char(1);
@@ -376,7 +377,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
   // it with DW_LNS_advance_line.
   if (Temp >= DWARF2_LINE_RANGE) {
     OS << char(dwarf::DW_LNS_advance_line);
-    MCObjectWriter::EncodeSLEB128(LineDelta, OS);
+    encodeSLEB128(LineDelta, OS);
 
     LineDelta = 0;
     Temp = 0 - DWARF2_LINE_BASE;
@@ -412,7 +413,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
 
   // Otherwise use DW_LNS_advance_pc.
   OS << char(dwarf::DW_LNS_advance_pc);
-  MCObjectWriter::EncodeULEB128(AddrDelta, OS);
+  encodeULEB128(AddrDelta, OS);
 
   if (NeedCopy)
     OS << char(dwarf::DW_LNS_copy);
@@ -1293,20 +1294,17 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
     streamer.EmitSymbolValue(&cieStart, 4);
   }
 
-  unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI);
-  unsigned size = getSizeForEncoding(streamer, fdeEncoding);
-
   // PC Begin
-  unsigned PCBeginEncoding = IsEH ? fdeEncoding :
-    (unsigned)dwarf::DW_EH_PE_absptr;
-  unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
-  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
+  unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
+                             : (unsigned)dwarf::DW_EH_PE_absptr;
+  unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
+  EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
 
   // PC Range
   const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
                                               *frame.End, 0);
   if (verboseAsm) streamer.AddComment("FDE address range");
-  streamer.EmitAbsValue(Range, size);
+  streamer.EmitAbsValue(Range, PCSize);
 
   if (IsEH) {
     // Augmentation Data Length
@@ -1329,7 +1327,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
 
   // Padding
-  streamer.EmitValueToAlignment(PCBeginSize);
+  streamer.EmitValueToAlignment(PCSize);
 
   return fdeEnd;
 }
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 4e6a1b9..29b4a94 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -507,15 +507,13 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
   PDataSection =
     Ctx->getCOFFSection(".pdata",
                         COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
+                        COFF::IMAGE_SCN_MEM_READ,
                         SectionKind::getDataRel());
 
   XDataSection =
     Ctx->getCOFFSection(".xdata",
                         COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
+                        COFF::IMAGE_SCN_MEM_READ,
                         SectionKind::getDataRel());
   TLSDataSection =
     Ctx->getCOFFSection(".tls$",
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 030f247..94d7cd6 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -17,40 +17,6 @@ using namespace llvm;
 MCObjectWriter::~MCObjectWriter() {
 }
 
-/// Utility function to encode a SLEB128 value.
-void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
-  bool More;
-  do {
-    uint8_t Byte = Value & 0x7f;
-    // NOTE: this assumes that this signed shift is an arithmetic right shift.
-    Value >>= 7;
-    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
-              ((Value == -1) && ((Byte & 0x40) != 0))));
-    if (More)
-      Byte |= 0x80; // Mark this byte that that more bytes will follow.
-    OS << char(Byte);
-  } while (More);
-}
-
-/// Utility function to encode a ULEB128 value.
-void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS,
-                                   unsigned Padding) {
-  do {
-    uint8_t Byte = Value & 0x7f;
-    Value >>= 7;
-    if (Value != 0 || Padding != 0)
-      Byte |= 0x80; // Mark this byte that that more bytes will follow.
-    OS << char(Byte);
-  } while (Value != 0);
-
-  // Pad with 0x80 and emit a null byte at the end.
-  if (Padding != 0) {
-    for (; Padding != 1; --Padding)
-      OS << '\x80';
-    OS << '\x00';
-  }
-}
-
 bool
 MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
                                                    const MCSymbolRefExpr *A,
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 2daad0a..240c10b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -46,14 +46,17 @@ namespace {
 
 /// \brief Helper class for tracking macro definitions.
 typedef std::vector<AsmToken> MacroArgument;
+typedef std::vector<MacroArgument> MacroArguments;
+typedef StringRef MacroParameter;
+typedef std::vector<MacroParameter> MacroParameters;
 
 struct Macro {
   StringRef Name;
   StringRef Body;
-  std::vector<StringRef> Parameters;
+  MacroParameters Parameters;
 
 public:
-  Macro(StringRef N, StringRef B, const std::vector<StringRef> &P) :
+  Macro(StringRef N, StringRef B, const MacroParameters &P) :
     Name(N), Body(B), Parameters(P) {}
 };
 
@@ -181,8 +184,8 @@ private:
 
   bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
-                   const std::vector<StringRef> &Parameters,
-                   const std::vector<MacroArgument> &A,
+                   const MacroParameters &Parameters,
+                   const MacroArguments &A,
                    const SMLoc &L);
   void HandleMacroExit();
 
@@ -207,7 +210,7 @@ private:
   void EatToEndOfStatement();
 
   bool ParseMacroArgument(MacroArgument &MA);
-  bool ParseMacroArguments(const Macro *M, std::vector<MacroArgument> &A);
+  bool ParseMacroArguments(const Macro *M, MacroArguments &A);
 
   /// \brief Parse up to the end of statement and a return the contents from the
   /// current token until the end of the statement; the current token on exit
@@ -1451,9 +1454,17 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
     NewDiag.print(0, OS);
 }
 
+// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
+// difference being that that function accepts '@' as part of identifiers and
+// we can't do that. AsmLexer.cpp should probably be changed to handle
+// '@' as a special case when needed.
+static bool isIdentifierChar(char c) {
+  return isalnum(c) || c == '_' || c == '$' || c == '.';
+}
+
 bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
-                            const std::vector<StringRef> &Parameters,
-                            const std::vector<MacroArgument> &A,
+                            const MacroParameters &Parameters,
+                            const MacroArguments &A,
                             const SMLoc &L) {
   unsigned NParameters = Parameters.size();
   if (NParameters != 0 && NParameters != A.size())
@@ -1515,7 +1526,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
       Pos += 2;
     } else {
       unsigned I = Pos + 1;
-      while (isalnum(Body[I]) && I + 1 != End)
+      while (isIdentifierChar(Body[I]) && I + 1 != End)
         ++I;
 
       const char *Begin = Body.data() + Pos +1;
@@ -1555,8 +1566,6 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
   unsigned ParenLevel = 0;
 
   for (;;) {
-    SMLoc LastTokenLoc;
-
     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
       return TokError("unexpected token in macro instantiation");
 
@@ -1578,13 +1587,12 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) {
     Lex();
   }
   if (ParenLevel != 0)
-    return TokError("unbalanced parenthesises in macro argument");
+    return TokError("unbalanced parentheses in macro argument");
   return false;
 }
 
 // Parse the macro instantiation arguments.
-bool AsmParser::ParseMacroArguments(const Macro *M,
-                                    std::vector<MacroArgument> &A) {
+bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) {
   const unsigned NParameters = M ? M->Parameters.size() : 0;
 
   // Parse two kinds of macro invocations:
@@ -1597,8 +1605,8 @@ bool AsmParser::ParseMacroArguments(const Macro *M,
     if (ParseMacroArgument(MA))
       return true;
 
-    if (!MA.empty())
-      A.push_back(MA);
+    A.push_back(MA);
+
     if (Lexer.is(AsmToken::EndOfStatement))
       return false;
 
@@ -1615,17 +1623,23 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
   if (ActiveMacros.size() == 20)
     return TokError("macros cannot be nested more than 20 levels deep");
 
-  std::vector<MacroArgument> MacroArguments;
-  if (ParseMacroArguments(M, MacroArguments))
+  MacroArguments A;
+  if (ParseMacroArguments(M, A))
     return true;
 
+  // Remove any trailing empty arguments. Do this after-the-fact as we have
+  // to keep empty arguments in the middle of the list or positionality
+  // gets off. e.g.,  "foo 1, , 2" vs. "foo 1, 2,"
+  while (!A.empty() && A.back().empty())
+    A.pop_back();
+
   // Macro instantiation is lexical, unfortunately. We construct a new buffer
   // to hold the macro body with substitutions.
   SmallString<256> Buf;
   StringRef Body = M->Body;
   raw_svector_ostream OS(Buf);
 
-  if (expandMacro(OS, Body, M->Parameters, MacroArguments, getTok().getLoc()))
+  if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc()))
     return true;
 
   // We include the .endmacro in the buffer as our queue to exit the macro
@@ -3065,14 +3079,14 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
                                            SMLoc DirectiveLoc) {
   StringRef Name;
   if (getParser().ParseIdentifier(Name))
-    return TokError("expected identifier in directive");
+    return TokError("expected identifier in '.macro' directive");
 
-  std::vector<StringRef> Parameters;
+  MacroParameters Parameters;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for(;;) {
-      StringRef Parameter;
+    for (;;) {
+      MacroParameter Parameter;
       if (getParser().ParseIdentifier(Parameter))
-        return TokError("expected identifier in directive");
+        return TokError("expected identifier in '.macro' directive");
       Parameters.push_back(Parameter);
 
       if (getLexer().isNot(AsmToken::Comma))
@@ -3126,7 +3140,7 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
 /// ::= .endm
 /// ::= .endmacro
 bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
-                                           SMLoc DirectiveLoc) {
+                                              SMLoc DirectiveLoc) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '" + Directive + "' directive");
 
@@ -3224,7 +3238,7 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) {
 
   // We Are Anonymous.
   StringRef Name;
-  std::vector<StringRef> Parameters;
+  MacroParameters Parameters;
   return new Macro(Name, Body, Parameters);
 }
 
@@ -3270,8 +3284,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
   // Macro instantiation is lexical, unfortunately. We construct a new buffer
   // to hold the macro body with substitutions.
   SmallString<256> Buf;
-  std::vector<StringRef> Parameters;
-  const std::vector<MacroArgument> A;
+  MacroParameters Parameters;
+  MacroArguments A;
   raw_svector_ostream OS(Buf);
   while (Count--) {
     if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc()))
@@ -3285,8 +3299,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) {
 /// ParseDirectiveIrp
 /// ::= .irp symbol,values
 bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
-  std::vector<StringRef> Parameters;
-  StringRef Parameter;
+  MacroParameters Parameters;
+  MacroParameter Parameter;
 
   if (ParseIdentifier(Parameter))
     return TokError("expected identifier in '.irp' directive");
@@ -3298,7 +3312,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
 
   Lex();
 
-  std::vector<MacroArgument> A;
+  MacroArguments A;
   if (ParseMacroArguments(0, A))
     return true;
 
@@ -3315,9 +3329,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
   SmallString<256> Buf;
   raw_svector_ostream OS(Buf);
 
-  for (std::vector<MacroArgument>::iterator i = A.begin(), e = A.end(); i != e;
-       ++i) {
-    std::vector<MacroArgument> Args;
+  for (MacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+    MacroArguments Args;
     Args.push_back(*i);
 
     if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
@@ -3332,8 +3345,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) {
 /// ParseDirectiveIrpc
 /// ::= .irpc symbol,values
 bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
-  std::vector<StringRef> Parameters;
-  StringRef Parameter;
+  MacroParameters Parameters;
+  MacroParameter Parameter;
 
   if (ParseIdentifier(Parameter))
     return TokError("expected identifier in '.irpc' directive");
@@ -3345,7 +3358,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
 
   Lex();
 
-  std::vector<MacroArgument> A;
+  MacroArguments A;
   if (ParseMacroArguments(0, A))
     return true;
 
@@ -3371,7 +3384,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) {
     MacroArgument Arg;
     Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1)));
 
-    std::vector<MacroArgument> Args;
+    MacroArguments Args;
     Args.push_back(Arg);
 
     if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc()))
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 5662fea..18033d0 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -50,6 +50,9 @@ public:
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous");
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
       ".secure_log_unique");
     AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
@@ -112,6 +115,9 @@ public:
   bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
   bool ParseDirectiveLsym(StringRef, SMLoc);
   bool ParseDirectiveSection(StringRef, SMLoc);
+  bool ParseDirectivePushSection(StringRef, SMLoc);
+  bool ParseDirectivePopSection(StringRef, SMLoc);
+  bool ParseDirectivePrevious(StringRef, SMLoc);
   bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
   bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
   bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
@@ -297,7 +303,7 @@ public:
 
 };
 
-}
+} // end anonymous namespace
 
 bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
                                          const char *Section,
@@ -457,6 +463,37 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
   return false;
 }
 
+/// ParseDirectivePushSection:
+///   ::= .pushsection identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectivePushSection(StringRef S, SMLoc Loc) {
+  getStreamer().PushSection();
+
+  if (ParseDirectiveSection(S, Loc)) {
+    getStreamer().PopSection();
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectivePopSection:
+///   ::= .popsection
+bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+  if (!getStreamer().PopSection())
+    return TokError(".popsection without corresponding .pushsection");
+  return false;
+}
+
+/// ParseDirectivePrevious:
+///   ::= .previous
+bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+  const MCSection *PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection == NULL)
+      return TokError(".previous without corresponding .section");
+  getStreamer().SwitchSection(PreviousSection);
+  return false;
+}
+
 /// ParseDirectiveSecureLogUnique
 ///  ::= .secure_log_unique ... message ...
 bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
@@ -707,4 +744,4 @@ MCAsmParserExtension *createDarwinAsmParser() {
   return new DarwinAsmParser;
 }
 
-}
+} // end llvm namespace
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
new file mode 100644
index 0000000..4d1aff3
--- /dev/null
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -0,0 +1,71 @@
+//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements MCRegisterInfo functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCRegisterInfo.h"
+
+using namespace llvm;
+
+unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+                                             const MCRegisterClass *RC) const {
+  for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
+    if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx))
+      return *Supers;
+  return 0;
+}
+
+unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const {
+  // Get a pointer to the corresponding SubRegIndices list. This list has the
+  // name of each sub-register in the same order as MCSubRegIterator.
+  const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+  for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+    if (*SRI == Idx)
+      return *Subs;
+  return 0;
+}
+
+unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const {
+  // Get a pointer to the corresponding SubRegIndices list. This list has the
+  // name of each sub-register in the same order as MCSubRegIterator.
+  const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
+  for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+    if (*Subs == SubReg)
+      return *SRI;
+  return 0;
+}
+
+int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
+  unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
+
+  DwarfLLVMRegPair Key = { RegNum, 0 };
+  const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+  if (I == M+Size || I->FromReg != RegNum)
+    return -1;
+  return I->ToReg;
+}
+
+int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
+  const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
+  unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
+
+  DwarfLLVMRegPair Key = { RegNum, 0 };
+  const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+  assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
+  return I->ToReg;
+}
+
+int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const {
+  const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum);
+  if (I == L2SEHRegs.end()) return (int)RegNum;
+  return I->second;
+}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index e363f28..0bac24d 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include <cstdlib>
@@ -94,7 +95,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
                                      unsigned Padding) {
   SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
-  MCObjectWriter::EncodeULEB128(Value, OSE, Padding);
+  encodeULEB128(Value, OSE, Padding);
   EmitBytes(OSE.str(), AddrSpace);
 }
 
@@ -103,7 +104,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
 void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
   SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
-  MCObjectWriter::EncodeSLEB128(Value, OSE);
+  encodeSLEB128(Value, OSE);
   EmitBytes(OSE.str(), AddrSpace);
 }
 
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 79e66fc..c05b4b1 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -228,8 +228,7 @@ static const MCSection *getWin64EHTableSection(StringRef suffix,
 
   return context.getCOFFSection((".xdata"+suffix).str(),
                                 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                COFF::IMAGE_SCN_MEM_READ |
-                                COFF::IMAGE_SCN_MEM_WRITE,
+                                COFF::IMAGE_SCN_MEM_READ,
                                 SectionKind::getDataRel());
 }
 
@@ -239,8 +238,7 @@ static const MCSection *getWin64EHFuncTableSection(StringRef suffix,
     return context.getObjectFileInfo()->getPDataSection();
   return context.getCOFFSection((".pdata"+suffix).str(),
                                 COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                COFF::IMAGE_SCN_MEM_READ |
-                                COFF::IMAGE_SCN_MEM_WRITE,
+                                COFF::IMAGE_SCN_MEM_READ,
                                 SectionKind::getDataRel());
 }
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 409d4fb..ed261a4 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1765,6 +1765,50 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
   return fs;
 }
 
+/* Rounding-mode corrrect round to integral value.  */
+APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
+  opStatus fs;
+  assertArithmeticOK(*semantics);
+
+  // If the exponent is large enough, we know that this value is already
+  // integral, and the arithmetic below would potentially cause it to saturate
+  // to +/-Inf.  Bail out early instead.
+  if (exponent+1 >= (int)semanticsPrecision(*semantics))
+    return opOK;
+
+  // The algorithm here is quite simple: we add 2^(p-1), where p is the
+  // precision of our format, and then subtract it back off again.  The choice
+  // of rounding modes for the addition/subtraction determines the rounding mode
+  // for our integral rounding as well.
+  // NOTE: When the input value is negative, we do subtraction followed by
+  // addition instead.
+  APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
+  IntegerConstant <<= semanticsPrecision(*semantics)-1;
+  APFloat MagicConstant(*semantics);
+  fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
+                                      rmNearestTiesToEven);
+  MagicConstant.copySign(*this);
+
+  if (fs != opOK)
+    return fs;
+
+  // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
+  bool inputSign = isNegative();
+
+  fs = add(MagicConstant, rounding_mode);
+  if (fs != opOK && fs != opInexact)
+    return fs;
+
+  fs = subtract(MagicConstant, rounding_mode);
+
+  // Restore the input sign.
+  if (inputSign != isNegative())
+    changeSign();
+
+  return fs;
+}
+
+
 /* Comparison requires normalized numbers.  */
 APFloat::cmpResult
 APFloat::compare(const APFloat &rhs) const
@@ -3278,16 +3322,8 @@ APFloat::APFloat(double d) : exponent2(0), sign2(0) {
 }
 
 namespace {
-  static void append(SmallVectorImpl<char> &Buffer,
-                     unsigned N, const char *Str) {
-    unsigned Start = Buffer.size();
-    Buffer.set_size(Start + N);
-    memcpy(&Buffer[Start], Str, N);
-  }
-
-  template <unsigned N>
-  void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
-    append(Buffer, N, Str);
+  void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
+    Buffer.append(Str.begin(), Str.end());
   }
 
   /// Removes data from the given significand until it is no more
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 9103327..83baf60 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMSupport
   Dwarf.cpp
   ErrorHandling.cpp
   FileUtilities.cpp
+  FileOutputBuffer.cpp
   FoldingSet.cpp
   FormattedStream.cpp
   GraphWriter.cpp
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index dc21155..3d5cce0 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -139,7 +139,7 @@ uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
 
   while (isValidOffset(offset)) {
     byte = Data[offset++];
-    result |= (byte & 0x7f) << shift;
+    result |= uint64_t(byte & 0x7f) << shift;
     shift += 7;
     if ((byte & 0x80) == 0)
       break;
@@ -160,7 +160,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
 
   while (isValidOffset(offset)) {
     byte = Data[offset++];
-    result |= (byte & 0x7f) << shift;
+    result |= uint64_t(byte & 0x7f) << shift;
     shift += 7;
     if ((byte & 0x80) == 0)
       break;
@@ -168,7 +168,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const {
 
   // Sign bit of byte is 2nd high order bit (0x40)
   if (shift < 64 && (byte & 0x40))
-    result |= -(1 << shift);
+    result |= -(1ULL << shift);
 
   *offset_ptr = offset;
   return result;
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 9fdb12e..c8e8900 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a handle way of adding debugging information to your
+// This file implements a handy way of adding debugging information to your
 // code, without it being enabled all of the time, and without having to add
 // command line options to enable it.
 //
@@ -18,8 +18,8 @@
 // can specify '-debug-only=foo' to enable JUST the debug information for the
 // foo class.
 //
-// When compiling in release mode, the -debug-* options and all code in DEBUG()
-// statements disappears, so it does not effect the runtime of the code.
+// When compiling without assertions, the -debug-* options and all code in
+// DEBUG() statements disappears, so it does not affect the runtime of the code.
 //
 //===----------------------------------------------------------------------===//
 
@@ -89,11 +89,11 @@ bool llvm::isCurrentDebugType(const char *DebugType) {
   return CurrentDebugType.empty() || DebugType == CurrentDebugType;
 }
 
-/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X
 /// option were specified.  Note that DebugFlag also needs to be set to true for
 /// debug output to be produced.
 ///
-void llvm::SetCurrentDebugType(const char *Type) {
+void llvm::setCurrentDebugType(const char *Type) {
   CurrentDebugType = Type;
 }
 
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
new file mode 100644
index 0000000..7dc9587
--- /dev/null
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -0,0 +1,148 @@
+//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility for creating a in-memory buffer that will be written to a file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileOutputBuffer.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+
+namespace llvm {
+
+
+FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End, 
+                                  StringRef Path, StringRef TmpPath)
+  : BufferStart(Start), BufferEnd(End) {
+  FinalPath.assign(Path);
+  TempPath.assign(TmpPath);
+}
+
+
+FileOutputBuffer::~FileOutputBuffer() {
+  // If not already commited, delete buffer and remove temp file.
+  if ( BufferStart != NULL ) {
+    sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize());
+    bool Existed;
+    sys::fs::remove(Twine(TempPath), Existed);
+  }
+}
+
+ 
+error_code FileOutputBuffer::create(StringRef FilePath, 
+                                    size_t Size,  
+                                    OwningPtr<FileOutputBuffer> &Result,
+                                    unsigned Flags) {
+  // If file already exists, it must be a regular file (to be mappable).
+  sys::fs::file_status Stat;
+  error_code EC = sys::fs::status(FilePath, Stat);
+  switch (Stat.type()) {
+    case sys::fs::file_type::file_not_found:
+      // If file does not exist, we'll create one.
+      break;
+    case sys::fs::file_type::regular_file: {
+        // If file is not currently writable, error out.
+        // FIXME: There is no sys::fs:: api for checking this.
+        // FIXME: In posix, you use the access() call to check this.
+      }
+      break;
+    default:
+      if (EC)
+        return EC;
+      else
+        return make_error_code(errc::operation_not_permitted);
+  }
+
+  // Delete target file.
+  bool Existed;
+  EC = sys::fs::remove(FilePath, Existed);
+  if (EC)
+    return EC;
+  
+  // Create new file in same directory but with random name.
+  SmallString<128> TempFilePath;
+  int FD;
+  EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",  
+                                                FD, TempFilePath, false, 0644);
+  if (EC)
+    return EC;
+  
+  // The unique_file() interface leaks lower layers and returns a file 
+  // descriptor.  There is no way to directly close it, so use this hack
+  // to hand it off to raw_fd_ostream to close for us.
+  {
+    raw_fd_ostream Dummy(FD, /*shouldClose=*/true);
+  }
+  
+  // Resize file to requested initial size
+  EC = sys::fs::resize_file(Twine(TempFilePath), Size);
+  if (EC)
+    return EC;
+  
+  // If requested, make the output file executable.
+  if ( Flags & F_executable ) {
+    sys::fs::file_status Stat2;
+    EC = sys::fs::status(Twine(TempFilePath), Stat2);
+    if (EC)
+      return EC;
+    
+    sys::fs::perms new_perms = Stat2.permissions();
+    if ( new_perms & sys::fs::owner_read )
+      new_perms |= sys::fs::owner_exe;
+    if ( new_perms & sys::fs::group_read )
+      new_perms |= sys::fs::group_exe;
+    if ( new_perms & sys::fs::others_read )
+      new_perms |= sys::fs::others_exe;
+    new_perms |= sys::fs::add_perms;
+    EC = sys::fs::permissions(Twine(TempFilePath), new_perms);
+    if (EC)
+      return EC;
+  }
+
+  // Memory map new file.
+  void *Base;
+  EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base);
+  if (EC)
+    return EC;
+  
+  // Create FileOutputBuffer object to own mapped range.
+  uint8_t *Start = reinterpret_cast<uint8_t*>(Base);
+  Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath));
+                     
+  return error_code::success();
+}                    
+
+
+error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
+  // Unmap buffer, letting OS flush dirty pages to file on disk.
+  void *Start = reinterpret_cast<void*>(BufferStart);
+  error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize());
+  if (EC)
+    return EC;
+  
+  // If requested, resize file as part of commit.
+  if ( NewSmallerSize != -1 ) {
+    EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+    if (EC)
+      return EC;
+  }
+  
+  // Rename file to final name.
+  return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+}
+
+
+} // namespace
+
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index da5baab..4e4a026 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -59,7 +59,8 @@ MutexImpl::MutexImpl( bool recursive)
   errorcode = pthread_mutexattr_settype(&attr, kind);
   assert(errorcode == 0);
 
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \
+    !defined(__DragonFly__) && !defined(__Bitrig__)
   // Make it a process local mutex
   errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
   assert(errorcode == 0);
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 7b26ea9..cca549d 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -124,6 +124,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case RTEMS: return "rtems";
   case NativeClient: return "nacl";
   case CNK: return "cnk";
+  case Bitrig: return "bitrig";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -293,6 +294,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("rtems", Triple::RTEMS)
     .StartsWith("nacl", Triple::NativeClient)
     .StartsWith("cnk", Triple::CNK)
+    .StartsWith("bitrig", Triple::Bitrig)
     .Default(Triple::UnknownOS);
 }
 
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index b41390a..6bddbdf 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -260,7 +260,7 @@ Path::GetCurrentDirectory() {
   return Path(pathname);
 }
 
-#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
     defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
 static int
 test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
@@ -329,7 +329,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
     if (realpath(exe_path, link_path))
       return Path(link_path);
   }
-#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
       defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
   char exe_path[PATH_MAX];
 
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 93ccd1a..f59551e 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -50,6 +50,12 @@
 #include <limits.h>
 #endif
 
+// Both stdio.h and cstdio are included via different pathes and
+// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros
+// either.
+#undef ferror
+#undef feof
+
 // For GNU Hurd
 #if defined(__GNU__) && !defined(PATH_MAX)
 # define PATH_MAX 4096
@@ -461,6 +467,118 @@ rety_open_create:
   return error_code::success();
 }
 
+error_code mapped_file_region::init(int fd, uint64_t offset) {
+  AutoFD FD(fd);
+
+  // Figure out how large the file is.
+  struct stat FileInfo;
+  if (fstat(fd, &FileInfo) == -1)
+    return error_code(errno, system_category());
+  uint64_t FileSize = FileInfo.st_size;
+
+  if (Size == 0)
+    Size = FileSize;
+  else if (FileSize < Size) {
+    // We need to grow the file.
+    if (ftruncate(fd, Size) == -1)
+      return error_code(errno, system_category());
+  }
+
+  int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
+  int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#ifdef MAP_FILE
+  flags |= MAP_FILE;
+#endif
+  Mapping = ::mmap(0, Size, prot, flags, fd, offset);
+  if (Mapping == MAP_FAILED)
+    return error_code(errno, system_category());
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  SmallString<128> path_storage;
+  StringRef name = path.toNullTerminatedStringRef(path_storage);
+  int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
+  int ofd = ::open(name.begin(), oflags);
+  if (ofd == -1) {
+    ec = error_code(errno, system_category());
+    return;
+  }
+
+  ec = init(ofd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping() {
+  // Make sure that the requested size fits within SIZE_T.
+  if (length > std::numeric_limits<size_t>::max()) {
+    ec = make_error_code(errc::invalid_argument);
+    return;
+  }
+
+  ec = init(fd, offset);
+  if (ec)
+    Mapping = 0;
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::munmap(Mapping, Size);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
+  other.Mapping = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  return Process::GetPageSize();
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                 StringRef path){
   SmallString<128> path_null(path);
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 174112e..5204147 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -20,9 +20,10 @@
 #ifdef HAVE_SYS_RESOURCE_H
 #include <sys/resource.h>
 #endif
-// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
-//  Unix.h includes this for us already.
-#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+// DragonFlyBSD, OpenBSD, and Bitrig have deprecated <malloc.h> for
+// <stdlib.h> instead. Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \
+    !defined(__OpenBSD__) && !defined(__Bitrig__)
 #include <malloc.h>
 #endif
 #ifdef HAVE_MALLOC_MALLOC_H
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 66eeab0..696768b 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -22,6 +22,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#undef max
+
 // MinGW doesn't define this.
 #ifndef _ERRNO_T_DEFINED
 #define _ERRNO_T_DEFINED
@@ -703,6 +705,203 @@ error_code get_magic(const Twine &path, uint32_t len,
   return error_code::success();
 }
 
+error_code mapped_file_region::init(int FD, uint64_t Offset) {
+  FileDescriptor = FD;
+  // Make sure that the requested size fits within SIZE_T.
+  if (Size > std::numeric_limits<SIZE_T>::max()) {
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return make_error_code(errc::invalid_argument);
+  }
+
+  DWORD flprotect;
+  switch (Mode) {
+  case readonly:  flprotect = PAGE_READONLY; break;
+  case readwrite: flprotect = PAGE_READWRITE; break;
+  case priv:      flprotect = PAGE_WRITECOPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+
+  FileMappingHandle = ::CreateFileMapping(FileHandle,
+                                          0,
+                                          flprotect,
+                                          Size >> 32,
+                                          Size & 0xffffffff,
+                                          0);
+  if (FileMappingHandle == NULL) {
+    error_code ec = windows_error(GetLastError());
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  DWORD dwDesiredAccess;
+  switch (Mode) {
+  case readonly:  dwDesiredAccess = FILE_MAP_READ; break;
+  case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break;
+  case priv:      dwDesiredAccess = FILE_MAP_COPY; break;
+  default: llvm_unreachable("invalid mapping mode");
+  }
+  Mapping = ::MapViewOfFile(FileMappingHandle,
+                            dwDesiredAccess,
+                            Offset >> 32,
+                            Offset & 0xffffffff,
+                            Size);
+  if (Mapping == NULL) {
+    error_code ec = windows_error(GetLastError());
+    ::CloseHandle(FileMappingHandle);
+    if (FileDescriptor)
+      _close(FileDescriptor);
+    else
+      ::CloseHandle(FileHandle);
+    return ec;
+  }
+
+  if (Size == 0) {
+    MEMORY_BASIC_INFORMATION mbi;
+    SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
+    if (Result == 0) {
+      error_code ec = windows_error(GetLastError());
+      ::UnmapViewOfFile(Mapping);
+      ::CloseHandle(FileMappingHandle);
+      if (FileDescriptor)
+        _close(FileDescriptor);
+      else
+        ::CloseHandle(FileHandle);
+      return ec;
+    }
+    Size = mbi.RegionSize;
+  }
+  return error_code::success();
+}
+
+mapped_file_region::mapped_file_region(const Twine &path,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec) 
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor()
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  // Convert path to UTF-16.
+  if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
+    return;
+
+  // Get file handle for creating a file mapping.
+  FileHandle = ::CreateFileW(c_str(path_utf16),
+                             Mode == readonly ? GENERIC_READ
+                                              : GENERIC_READ | GENERIC_WRITE,
+                             Mode == readonly ? FILE_SHARE_READ
+                                              : 0,
+                             0,
+                             Mode == readonly ? OPEN_EXISTING
+                                              : OPEN_ALWAYS,
+                             Mode == readonly ? FILE_ATTRIBUTE_READONLY
+                                              : FILE_ATTRIBUTE_NORMAL,
+                             0);
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    ec = windows_error(::GetLastError());
+    return;
+  }
+
+  FileDescriptor = 0;
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::mapped_file_region(int fd,
+                                       mapmode mode,
+                                       uint64_t length,
+                                       uint64_t offset,
+                                       error_code &ec)
+  : Mode(mode)
+  , Size(length)
+  , Mapping()
+  , FileDescriptor(fd)
+  , FileHandle(INVALID_HANDLE_VALUE)
+  , FileMappingHandle() {
+  FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
+  if (FileHandle == INVALID_HANDLE_VALUE) {
+    _close(FileDescriptor);
+    FileDescriptor = 0;
+    ec = make_error_code(errc::bad_file_descriptor);
+    return;
+  }
+
+  ec = init(FileDescriptor, offset);
+  if (ec) {
+    Mapping = FileMappingHandle = 0;
+    FileHandle = INVALID_HANDLE_VALUE;
+    FileDescriptor = 0;
+  }
+}
+
+mapped_file_region::~mapped_file_region() {
+  if (Mapping)
+    ::UnmapViewOfFile(Mapping);
+  if (FileMappingHandle)
+    ::CloseHandle(FileMappingHandle);
+  if (FileDescriptor)
+    _close(FileDescriptor);
+  else if (FileHandle != INVALID_HANDLE_VALUE)
+    ::CloseHandle(FileHandle);
+}
+
+#if LLVM_USE_RVALUE_REFERENCES
+mapped_file_region::mapped_file_region(mapped_file_region &&other)
+  : Mode(other.Mode)
+  , Size(other.Size)
+  , Mapping(other.Mapping)
+  , FileDescriptor(other.FileDescriptor)
+  , FileHandle(other.FileHandle)
+  , FileMappingHandle(other.FileMappingHandle) {
+  other.Mapping = other.FileMappingHandle = 0;
+  other.FileHandle = INVALID_HANDLE_VALUE;
+  other.FileDescriptor = 0;
+}
+#endif
+
+mapped_file_region::mapmode mapped_file_region::flags() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Mode;
+}
+
+uint64_t mapped_file_region::size() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return Size;
+}
+
+char *mapped_file_region::data() const {
+  assert(Mode != readonly && "Cannot get non const data for readonly mapping!");
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<char*>(Mapping);
+}
+
+const char *mapped_file_region::const_data() const {
+  assert(Mapping && "Mapping failed but used anyway!");
+  return reinterpret_cast<const char*>(Mapping);
+}
+
+int mapped_file_region::alignment() {
+  SYSTEM_INFO SysInfo;
+  ::GetSystemInfo(&SysInfo);
+  return SysInfo.dwAllocationGranularity;
+}
+
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
                                                 StringRef path){
   SmallVector<wchar_t, 128> path_utf16;
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 9424677..b9c7ff6 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -2284,23 +2284,33 @@ InstantiateMulticlassDef(MultiClass &MC,
   Ref.Rec = DefProto;
   AddSubClass(CurRec, Ref);
 
-  if (DefNameString == 0) {
-    // We must resolve references to NAME.
-    if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
-                 DefmPrefix)) {
-      Error(DefmPrefixLoc, "Could not resolve "
-            + CurRec->getNameInitAsString() + ":NAME to '"
-            + DefmPrefix->getAsUnquotedString() + "'");
-      return 0;
-    }
+  // Set the value for NAME. We don't resolve references to it 'til later,
+  // though, so that uses in nested multiclass names don't get
+  // confused.
+  if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
+               DefmPrefix)) {
+    Error(DefmPrefixLoc, "Could not resolve "
+          + CurRec->getNameInitAsString() + ":NAME to '"
+          + DefmPrefix->getAsUnquotedString() + "'");
+    return 0;
+  }
 
+  // If the DefNameString didn't resolve, we probably have a reference to
+  // NAME and need to replace it. We need to do at least this much greedily,
+  // otherwise nested multiclasses will end up with incorrect NAME expansions.
+  if (DefNameString == 0) {
     RecordVal *DefNameRV = CurRec->getValue("NAME");
     CurRec->resolveReferencesTo(DefNameRV);
   }
 
   if (!CurMultiClass) {
-    // We do this after resolving NAME because before resolution, many
-    // multiclass defs will have the same name expression.  If we are
+    // Now that we're at the top level, resolve all NAME references
+    // in the resultant defs that weren't in the def names themselves.
+    RecordVal *DefNameRV = CurRec->getValue("NAME");
+    CurRec->resolveReferencesTo(DefNameRV);
+
+    // Now that NAME references are resolved and we're at the top level of
+    // any multiclass expansions, add the record to the RecordKeeper. If we are
     // currently in a multiclass, it means this defm appears inside a
     // multiclass and its name won't be fully resolvable until we see
     // the top-level defm.  Therefore, we don't add this to the
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index cd3c0e0..69e2346 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3",       [HasV7Ops,
 def : ProcNoItin<"cortex-m4",       [HasV7Ops,
                                      FeatureThumb2, FeatureNoARM, FeatureDB,
                                      FeatureHWDiv, FeatureDSPThumb2,
-                                     FeatureT2XtPk, FeatureVFP2,
+                                     FeatureT2XtPk, FeatureVFP4,
                                      FeatureVFPOnlySP, FeatureMClass]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 9a1ce06..e9e2803 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -529,10 +529,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       return false;
     }
 
-    // These modifiers are not yet supported.
+    // This modifier is not yet supported.
     case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
-    case 'H': // The highest-numbered register of a pair.
       return true;
+    case 'H': { // The highest-numbered register of a pair.
+      const MachineOperand &MO = MI->getOperand(OpNum);
+      if (!MO.isReg())
+        return true;
+      const TargetRegisterClass &RC = ARM::GPRRegClass;
+      const MachineFunction &MF = *MI->getParent()->getParent();
+      const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+      unsigned RegIdx = TRI->getEncodingValue(MO.getReg());
+      RegIdx |= 1; //The odd register is also the higher-numbered one of a pair.
+
+      unsigned Reg = RC.getRegister(RegIdx);
+      O << ARMInstPrinter::getRegisterName(Reg);
+      return false;
+    }
     }
   }
 
@@ -1136,8 +1150,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
       assert(SrcReg == ARM::SP &&
              "Only stack pointer as a source reg is supported");
       for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
-           i != NumOps; ++i)
-        RegList.push_back(MI->getOperand(i).getReg());
+           i != NumOps; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        // Actually, there should never be any impdef stuff here. Skip it
+        // temporary to workaround PR11902.
+        if (MO.isImplicit())
+          continue;
+        RegList.push_back(MO.getReg());
+      }
       break;
     case ARM::STR_PRE_IMM:
     case ARM::STR_PRE_REG:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 714238a..29033e5 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       } else
         llvm_unreachable("Unknown reg class!");
       break;
+    case 24:
+      if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+        // Use aligned spills if the stack can be realigned.
+        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
+        } else {
+          MachineInstrBuilder MIB =
+          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                       .addFrameIndex(FI))
+                       .addMemOperand(MMO);
+          MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+          MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+          AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+        }
+      } else
+        llvm_unreachable("Unknown reg class!");
+      break;
     case 32:
-      if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+      if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
           // FIXME: It's possible to only store part of the QQ register if the
           // spilled def has a sub-register index.
@@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
     }
     break;
   case ARM::VST1q64:
+  case ARM::VST1d64TPseudo:
+  case ARM::VST1d64QPseudo:
     if (MI->getOperand(0).isFI() &&
         MI->getOperand(2).getSubReg() == 0) {
       FrameIndex = MI->getOperand(0).getIndex();
@@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     } else
       llvm_unreachable("Unknown reg class!");
     break;
-  case 32:
-    if (ARM::QQPRRegClass.hasSubClassEq(RC)) {
+  case 24:
+    if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
+      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
+      } else {
+        MachineInstrBuilder MIB =
+          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                         .addFrameIndex(FI)
+                         .addMemOperand(MMO));
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+          MIB.addReg(DestReg, RegState::ImplicitDefine);
+      }
+    } else
+      llvm_unreachable("Unknown reg class!");
+    break;
+   case 32:
+    if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
                      .addFrameIndex(FI).addImm(16)
@@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
     }
     break;
   case ARM::VLD1q64:
+  case ARM::VLD1d64TPseudo:
+  case ARM::VLD1d64QPseudo:
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(0).getSubReg() == 0) {
       FrameIndex = MI->getOperand(1).getIndex();
@@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
 }
 
+/// Identify instructions that can be folded into a MOVCC instruction, and
+/// return the corresponding opcode for the predicated pseudo-instruction.
+static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI,
+                                 const MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return 0;
+  if (!MRI.hasOneNonDBGUse(Reg))
+    return 0;
+  MI = MRI.getVRegDef(Reg);
+  if (!MI)
+    return 0;
+  // Check if MI has any non-dead defs or physreg uses. This also detects
+  // predicated instructions which will be reading CPSR.
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    // Reject frame index operands, PEI can't handle the predicated pseudos.
+    if (MO.isFI() || MO.isCPI() || MO.isJTI())
+      return 0;
+    if (!MO.isReg())
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+      return 0;
+    if (MO.isDef() && !MO.isDead())
+      return 0;
+  }
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::ANDri:   return ARM::ANDCCri;
+  case ARM::ANDrr:   return ARM::ANDCCrr;
+  case ARM::ANDrsi:  return ARM::ANDCCrsi;
+  case ARM::ANDrsr:  return ARM::ANDCCrsr;
+  case ARM::t2ANDri: return ARM::t2ANDCCri;
+  case ARM::t2ANDrr: return ARM::t2ANDCCrr;
+  case ARM::t2ANDrs: return ARM::t2ANDCCrs;
+  case ARM::EORri:   return ARM::EORCCri;
+  case ARM::EORrr:   return ARM::EORCCrr;
+  case ARM::EORrsi:  return ARM::EORCCrsi;
+  case ARM::EORrsr:  return ARM::EORCCrsr;
+  case ARM::t2EORri: return ARM::t2EORCCri;
+  case ARM::t2EORrr: return ARM::t2EORCCrr;
+  case ARM::t2EORrs: return ARM::t2EORCCrs;
+  case ARM::ORRri:   return ARM::ORRCCri;
+  case ARM::ORRrr:   return ARM::ORRCCrr;
+  case ARM::ORRrsi:  return ARM::ORRCCrsi;
+  case ARM::ORRrsr:  return ARM::ORRCCrsr;
+  case ARM::t2ORRri: return ARM::t2ORRCCri;
+  case ARM::t2ORRrr: return ARM::t2ORRCCrr;
+  case ARM::t2ORRrs: return ARM::t2ORRCCrs;
+
+  // ARM ADD/SUB
+  case ARM::ADDri:   return ARM::ADDCCri;
+  case ARM::ADDrr:   return ARM::ADDCCrr;
+  case ARM::ADDrsi:  return ARM::ADDCCrsi;
+  case ARM::ADDrsr:  return ARM::ADDCCrsr;
+  case ARM::SUBri:   return ARM::SUBCCri;
+  case ARM::SUBrr:   return ARM::SUBCCrr;
+  case ARM::SUBrsi:  return ARM::SUBCCrsi;
+  case ARM::SUBrsr:  return ARM::SUBCCrsr;
+
+  // Thumb2 ADD/SUB
+  case ARM::t2ADDri:   return ARM::t2ADDCCri;
+  case ARM::t2ADDri12: return ARM::t2ADDCCri12;
+  case ARM::t2ADDrr:   return ARM::t2ADDCCrr;
+  case ARM::t2ADDrs:   return ARM::t2ADDCCrs;
+  case ARM::t2SUBri:   return ARM::t2SUBCCri;
+  case ARM::t2SUBri12: return ARM::t2SUBCCri12;
+  case ARM::t2SUBrr:   return ARM::t2SUBCCrr;
+  case ARM::t2SUBrs:   return ARM::t2SUBCCrs;
+  }
+}
+
+bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     unsigned &TrueOp, unsigned &FalseOp,
+                                     bool &Optimizable) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  // MOVCC operands:
+  // 0: Def.
+  // 1: True use.
+  // 2: False use.
+  // 3: Condition code.
+  // 4: CPSR use.
+  TrueOp = 1;
+  FalseOp = 2;
+  Cond.push_back(MI->getOperand(3));
+  Cond.push_back(MI->getOperand(4));
+  // We can always fold a def.
+  Optimizable = true;
+  return false;
+}
+
+MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
+                                               bool PreferFalse) const {
+  assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
+         "Unknown select instruction");
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineInstr *DefMI = 0;
+  unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI);
+  bool Invert = !Opc;
+  if (!Opc)
+    Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI);
+  if (!Opc)
+    return 0;
+
+  // Create a new predicated version of DefMI.
+  // Rfalse is the first use.
+  MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                      get(Opc), MI->getOperand(0).getReg())
+    .addOperand(MI->getOperand(Invert ? 2 : 1));
+
+  // Copy all the DefMI operands, excluding its (null) predicate.
+  const MCInstrDesc &DefDesc = DefMI->getDesc();
+  for (unsigned i = 1, e = DefDesc.getNumOperands();
+       i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
+    NewMI.addOperand(DefMI->getOperand(i));
+
+  unsigned CondCode = MI->getOperand(3).getImm();
+  if (Invert)
+    NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
+  else
+    NewMI.addImm(CondCode);
+  NewMI.addOperand(MI->getOperand(4));
+
+  // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
+  if (NewMI->hasOptionalDef())
+    AddDefaultCC(NewMI);
+
+  // The caller will erase MI, but not DefMI.
+  DefMI->eraseFromParent();
+  return NewMI;
+}
+
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
 /// def operand.
@@ -3180,11 +3357,18 @@ enum ARMExeDomain {
 //
 std::pair<uint16_t, uint16_t>
 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
-  // VMOVD is a VFP instruction, but can be changed to NEON if it isn't
-  // predicated.
+  // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
+  // if they are not predicated.
   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
 
+  // Cortex-A9 is particularly picky about mixing the two and wants these
+  // converted.
+  if (Subtarget.isCortexA9() && !isPredicated(MI) &&
+      (MI->getOpcode() == ARM::VMOVRS ||
+       MI->getOpcode() == ARM::VMOVSR))
+    return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
+
   // No other instructions can be swizzled, so just determine their domain.
   unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
 
@@ -3204,22 +3388,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
 
 void
 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
-  // We only know how to change VMOVD into VORR.
-  assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD");
-  if (Domain != ExeNEON)
-    return;
+  unsigned DstReg, SrcReg, DReg;
+  unsigned Lane;
+  MachineInstrBuilder MIB(MI);
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  bool isKill;
+  switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("cannot handle opcode!");
+      break;
+    case ARM::VMOVD:
+      if (Domain != ExeNEON)
+        break;
 
-  // Zap the predicate operands.
-  assert(!isPredicated(MI) && "Cannot predicate a VORRd");
-  MI->RemoveOperand(3);
-  MI->RemoveOperand(2);
+      // Zap the predicate operands.
+      assert(!isPredicated(MI) && "Cannot predicate a VORRd");
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
 
-  // Change to a VORRd which requires two identical use operands.
-  MI->setDesc(get(ARM::VORRd));
+      // Change to a VORRd which requires two identical use operands.
+      MI->setDesc(get(ARM::VORRd));
+
+      // Add the extra source operand and new predicates.
+      // This will go before any implicit ops.
+      AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
+      break;
+    case ARM::VMOVRS:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+
+      DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
+
+      MI->setDesc(get(ARM::VGETLNi32));
+      MIB.addReg(DReg);
+      MIB.addImm(Lane);
+
+      MIB->getOperand(1).setIsUndef();
+      MIB.addReg(SrcReg, RegState::Implicit);
+
+      AddDefaultPred(MIB);
+      break;
+    case ARM::VMOVSR:
+      if (Domain != ExeNEON)
+        break;
+      assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
+
+      DstReg = MI->getOperand(0).getReg();
+      SrcReg = MI->getOperand(1).getReg();
+      DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass);
+      Lane = 0;
+      if (DReg == ARM::NoRegister) {
+        DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass);
+        Lane = 1;
+        assert(DReg && "S-register with no D super-register?");
+      }
+      isKill = MI->getOperand(0).isKill();
+
+      MI->RemoveOperand(3);
+      MI->RemoveOperand(2);
+      MI->RemoveOperand(1);
+      MI->RemoveOperand(0);
+
+      MI->setDesc(get(ARM::VSETLNi32));
+      MIB.addReg(DReg, RegState::Define);
+      MIB.addReg(DReg, RegState::Undef);
+      MIB.addReg(SrcReg);
+      MIB.addImm(Lane);
+
+      if (isKill)
+        MIB->addRegisterKilled(DstReg, TRI, true);
+      MIB->addRegisterDefined(DstReg, TRI);
+
+      AddDefaultPred(MIB);
+      break;
+  }
 
-  // Add the extra source operand and new predicates.
-  // This will go before any implicit ops.
-  AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
 }
 
 bool ARMBaseInstrInfo::hasNOP() const {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1a10a4a..92e5ee8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -202,6 +202,13 @@ public:
                                     unsigned SrcReg2, int CmpMask, int CmpValue,
                                     const MachineRegisterInfo *MRI) const;
 
+  virtual bool analyzeSelect(const MachineInstr *MI,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             unsigned &TrueOp, unsigned &FalseOp,
+                             bool &Optimizable) const;
+
+  virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const;
+
   /// FoldImmediate - 'Reg' is known to be defined by a move immediate
   /// instruction, try to fold the immediate into the use instruction.
   virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
@@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
 
 int getMatchingCondBranchOpcode(int Opc);
 
+/// Determine if MI can be folded into an ARM MOVCC instruction, and return the
+/// opcode of the SSA instruction representing the conditional MI.
+unsigned canFoldARMInstrIntoMOVCC(unsigned Reg,
+                                  MachineInstr *&MI,
+                                  const MachineRegisterInfo &MRI);
 
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether
 /// the instruction is encoded with an 'S' bit is determined by the optional
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 231bd26..9deb96e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -62,8 +62,20 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
 
 const uint16_t*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  bool ghcCall = false;
+ 
+  if (MF) {
+    const Function *F = MF->getFunction();
+    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+  }
+ 
+  if (ghcCall) {
+      return CSR_GHC_SaveList;
+  }
+  else {
   return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
     ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+  }
 }
 
 const uint32_t*
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a2512..bda1517 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[
   CCDelegateTo<RetCC_ARM_APCS>
 ]>;
 
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for GHC
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_APCS_GHC : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
+  CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
+  CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
+  CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
+]>;
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS (EABI) Calling Convention, common parts
@@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[
 //===----------------------------------------------------------------------===//
 
 def CC_ARM_AAPCS : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[
 //===----------------------------------------------------------------------===//
 
 def CC_ARM_AAPCS_VFP : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
@@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
 // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
 // Also save R7-R4 first to match the stack frame fixed spill areas.
 def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// GHC set of callee saved regs is empty as all those regs are
+// used for passing STG regs around
+// add is a workaround for not being able to compile empty list:
+// def CSR_GHC : CalleeSavedRegs<()>;
+def CSR_GHC : CalleeSavedRegs<(add)>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index af260a5..132b81f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -264,7 +264,7 @@ namespace {
         emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
         return 0;
       }
-      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
       int32_t Imm12 = MO1.getImm();
       uint32_t Binary;
       Binary = Imm12 & 0xfff;
@@ -314,18 +314,24 @@ namespace {
       // {7-0}   = imm8
       uint32_t Binary = 0;
       const MachineOperand &MO  = MI.getOperand(Op);
-      uint32_t Reg = getMachineOpValue(MI, MO);
-      Binary |= (Reg << 9);
-
-      // If there is a non-zero immediate offset, encode it.
-      if (MO.isReg()) {
-          const MachineOperand &MO1 = MI.getOperand(Op + 1);
-        if (uint32_t ImmOffs = ARM_AM::getAM5Offset(MO1.getImm())) {
-          if (ARM_AM::getAM5Op(MO1.getImm()) == ARM_AM::add)
-            Binary |= 1 << 8;
-          Binary |= ImmOffs & 0xff;
-          return Binary;
-        }
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+
+      // Special value for #-0
+      if (Imm12 == INT32_MIN)
+        Imm12 = 0;
+
+      // Immediate is always encoded as positive. The 'U' bit controls add vs
+      // sub.
+      bool isAdd = true;
+      if (Imm12 < 0) {
+        Imm12 = -Imm12;
+        isAdd = false;
       }
 
       // If immediate offset is omitted, default to +0.
@@ -367,6 +373,12 @@ namespace {
     void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
     void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
                                intptr_t JTBase = 0) const;
+    unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const;
+    unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const;
+    unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const;
+    unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const;
+    unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const;
+    unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const;
   };
 }
 
@@ -455,7 +467,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
 unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
   if (MO.isReg())
-    return getARMRegisterNumbering(MO.getReg());
+    return II->getRegisterInfo().getEncodingValue(MO.getReg());
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
   else if (MO.isFPImm())
@@ -816,7 +828,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
 
   // Encode Rn which is PC.
-  Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+  Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
 
   // Encode the displacement which is a so_imm.
   // Set bit I(25) to identify this is the immediate form of <shifter_op>
@@ -844,7 +856,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
 
   // Encode Rn which is PC.
-  Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+  Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift;
 
   // Encode the displacement.
   Binary |= 1 << ARMII::I_BitShift;
@@ -1045,7 +1057,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
   if (Rs) {
     // Encode Rs bit[11:8].
     assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
-    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+    return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
   }
 
   // Encode shift_imm bit[11:7].
@@ -1101,7 +1113,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
   else if (ImplicitRd)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
 
   if (MCID.Opcode == ARM::MOVi16) {
       // Get immediate from MI.
@@ -1151,7 +1163,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   if (!isUnary) {
     if (ImplicitRn)
       // Special handling for implicit use (e.g. PC).
-      Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+      Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
     else {
       Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
       ++OpIdx;
@@ -1168,7 +1180,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 
   if (MO.isReg()) {
     // Encode register Rm.
-    emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+    emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg()));
     return;
   }
 
@@ -1217,14 +1229,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
   // Set first operand
   if (ImplicitRd)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
@@ -1251,7 +1263,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
   Binary |= 1 << ARMII::I_BitShift;
   assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
   // Set bit[3:0] to the corresponding Rm register
-  Binary |= getARMRegisterNumbering(MO2.getReg());
+  Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
 
   // If this instr is in scaled register offset/index instruction, set
   // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
@@ -1295,7 +1307,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
-    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+    Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift);
   else
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
@@ -1314,7 +1326,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // If this instr is in register offset/index encoding, set bit[3:0]
   // to the corresponding Rm register.
   if (MO2.getReg()) {
-    Binary |= getARMRegisterNumbering(MO2.getReg());
+    Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg());
     emitWordLE(Binary);
     return;
   }
@@ -1385,7 +1397,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
-    unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+    unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg());
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            RegNum < 16);
     Binary |= 0x1 << RegNum;
@@ -1632,7 +1644,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
 
   if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
     // The return register is LR.
-    Binary |= getARMRegisterNumbering(ARM::LR);
+    Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR);
   else
     // otherwise, set the return register
     Binary |= getMachineOpValue(MI, 0);
@@ -1640,11 +1652,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI,
+                                     unsigned OpIdx) const {
   unsigned RegD = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = ARM::SPRRegClass.contains(RegD);
-  RegD = getARMRegisterNumbering(RegD);
+  RegD = II->getRegisterInfo().getEncodingValue(RegD);
   if (!isSPVFP) {
     Binary |=  (RegD & 0x0F)       << ARMII::RegRdShift;
     Binary |= ((RegD & 0x10) >> 4) << ARMII::D_BitShift;
@@ -1655,11 +1668,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
   return Binary;
 }
 
-static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI,
+                                     unsigned OpIdx) const {
   unsigned RegN = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = ARM::SPRRegClass.contains(RegN);
-  RegN = getARMRegisterNumbering(RegN);
+  RegN = II->getRegisterInfo().getEncodingValue(RegN);
   if (!isSPVFP) {
     Binary |=  (RegN & 0x0F)       << ARMII::RegRnShift;
     Binary |= ((RegN & 0x10) >> 4) << ARMII::N_BitShift;
@@ -1670,11 +1684,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
   return Binary;
 }
 
-static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI,
+                                     unsigned OpIdx) const {
   unsigned RegM = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
   bool isSPVFP = ARM::SPRRegClass.contains(RegM);
-  RegM = getARMRegisterNumbering(RegM);
+  RegM = II->getRegisterInfo().getEncodingValue(RegM);
   if (!isSPVFP) {
     Binary |=  (RegM & 0x0F);
     Binary |= ((RegM & 0x10) >> 4) << ARMII::M_BitShift;
@@ -1885,28 +1900,31 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
   emitWordLE(Binary);
 }
 
-static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI,
+                                      unsigned OpIdx) const {
   unsigned RegD = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegD = getARMRegisterNumbering(RegD);
+  RegD = II->getRegisterInfo().getEncodingValue(RegD);
   Binary |= (RegD & 0xf) << ARMII::RegRdShift;
   Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
   return Binary;
 }
 
-static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI,
+                                      unsigned OpIdx) const {
   unsigned RegN = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegN = getARMRegisterNumbering(RegN);
+  RegN = II->getRegisterInfo().getEncodingValue(RegN);
   Binary |= (RegN & 0xf) << ARMII::RegRnShift;
   Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
   return Binary;
 }
 
-static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI,
+                                      unsigned OpIdx) const {
   unsigned RegM = MI.getOperand(OpIdx).getReg();
   unsigned Binary = 0;
-  RegM = getARMRegisterNumbering(RegM);
+  RegM = II->getRegisterInfo().getEncodingValue(RegM);
   Binary |= (RegM & 0xf);
   Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
   return Binary;
@@ -1940,7 +1958,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
   Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
 
   unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
-  RegT = getARMRegisterNumbering(RegT);
+  RegT = II->getRegisterInfo().getEncodingValue(RegT);
   Binary |= (RegT << ARMII::RegRdShift);
   Binary |= encodeNEONRn(MI, RegNOpIdx);
 
@@ -1969,7 +1987,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
   Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
 
   unsigned RegT = MI.getOperand(1).getReg();
-  RegT = getARMRegisterNumbering(RegT);
+  RegT = II->getRegisterInfo().getEncodingValue(RegT);
   Binary |= (RegT << ARMII::RegRdShift);
   Binary |= encodeNEONRn(MI, 0);
   emitWordLE(Binary);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a242b13..15bb32e 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1009,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
       unsigned OpIdx = 0;
       unsigned SrcReg = MI.getOperand(1).getReg();
-      unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+      unsigned Lane = TRI->getEncodingValue(SrcReg) & 1;
       unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
                             Lane & 1 ? ARM::ssub_1 : ARM::ssub_0,
                             &ARM::DPR_VFP2RegClass);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index b96395f..5a5ca1b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -87,8 +87,9 @@ class ARMFastISel : public FastISel {
   LLVMContext *Context;
 
   public:
-    explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
-    : FastISel(funcInfo),
+    explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
+                         const TargetLibraryInfo *libInfo)
+    : FastISel(funcInfo, libInfo),
       TM(funcInfo.MF->getTarget()),
       TII(*TM.getInstrInfo()),
       TLI(*TM.getTargetLowering()) {
@@ -99,51 +100,53 @@ class ARMFastISel : public FastISel {
     }
 
     // Code from FastISel.cpp.
-    virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
-                                   const TargetRegisterClass *RC);
-    virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
-                                    const TargetRegisterClass *RC,
-                                    unsigned Op0, bool Op0IsKill);
-    virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
-                                     const TargetRegisterClass *RC,
-                                     unsigned Op0, bool Op0IsKill,
-                                     unsigned Op1, bool Op1IsKill);
-    virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
-                                      const TargetRegisterClass *RC,
-                                      unsigned Op0, bool Op0IsKill,
-                                      unsigned Op1, bool Op1IsKill,
-                                      unsigned Op2, bool Op2IsKill);
-    virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
-                                     const TargetRegisterClass *RC,
-                                     unsigned Op0, bool Op0IsKill,
-                                     uint64_t Imm);
-    virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
-                                     const TargetRegisterClass *RC,
-                                     unsigned Op0, bool Op0IsKill,
-                                     const ConstantFP *FPImm);
-    virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
-                                      const TargetRegisterClass *RC,
-                                      unsigned Op0, bool Op0IsKill,
-                                      unsigned Op1, bool Op1IsKill,
-                                      uint64_t Imm);
-    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
-                                    const TargetRegisterClass *RC,
-                                    uint64_t Imm);
-    virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
-                                     const TargetRegisterClass *RC,
-                                     uint64_t Imm1, uint64_t Imm2);
-
-    virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
-                                                unsigned Op0, bool Op0IsKill,
-                                                uint32_t Idx);
+  private:
+    unsigned FastEmitInst_(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC);
+    unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC,
+                            unsigned Op0, bool Op0IsKill);
+    unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC,
+                             unsigned Op0, bool Op0IsKill,
+                             unsigned Op1, bool Op1IsKill);
+    unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
+                              const TargetRegisterClass *RC,
+                              unsigned Op0, bool Op0IsKill,
+                              unsigned Op1, bool Op1IsKill,
+                              unsigned Op2, bool Op2IsKill);
+    unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC,
+                             unsigned Op0, bool Op0IsKill,
+                             uint64_t Imm);
+    unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC,
+                             unsigned Op0, bool Op0IsKill,
+                             const ConstantFP *FPImm);
+    unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+                              const TargetRegisterClass *RC,
+                              unsigned Op0, bool Op0IsKill,
+                              unsigned Op1, bool Op1IsKill,
+                              uint64_t Imm);
+    unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC,
+                            uint64_t Imm);
+    unsigned FastEmitInst_ii(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC,
+                             uint64_t Imm1, uint64_t Imm2);
+
+    unsigned FastEmitInst_extractsubreg(MVT RetVT,
+                                        unsigned Op0, bool Op0IsKill,
+                                        uint32_t Idx);
 
     // Backend specific FastISel code.
+  private:
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
     virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
                                const LoadInst *LI);
-
+  private:
   #include "ARMGenFastISel.inc"
 
     // Instruction selection routines.
@@ -167,6 +170,7 @@ class ARMFastISel : public FastISel {
     bool SelectRet(const Instruction *I);
     bool SelectTrunc(const Instruction *I);
     bool SelectIntExt(const Instruction *I);
+    bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
 
     // Utility routines.
   private:
@@ -1819,9 +1823,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   default:
     llvm_unreachable("Unsupported calling convention");
   case CallingConv::Fast:
-    // Ignore fastcc. Silence compiler warnings.
-    (void)RetFastCC_ARM_APCS;
-    (void)FastCC_ARM_APCS;
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
     // Fallthrough
   case CallingConv::C:
     // Use target triple & subtarget features to do actual dispatch.
@@ -1842,6 +1849,11 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::GHC:
+    if (Return)
+      llvm_unreachable("Can't return in GHC call convention");
+    else
+      return CC_ARM_APCS_GHC;
   }
 }
 
@@ -2608,6 +2620,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
   return true;
 }
 
+bool ARMFastISel::SelectShift(const Instruction *I,
+                              ARM_AM::ShiftOpc ShiftTy) {
+  // We handle thumb2 mode by target independent selector
+  // or SelectionDAG ISel.
+  if (isThumb2)
+    return false;
+
+  // Only handle i32 now.
+  EVT DestVT = TLI.getValueType(I->getType(), true);
+  if (DestVT != MVT::i32)
+    return false;
+
+  unsigned Opc = ARM::MOVsr;
+  unsigned ShiftImm;
+  Value *Src2Value = I->getOperand(1);
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
+    ShiftImm = CI->getZExtValue();
+
+    // Fall back to selection DAG isel if the shift amount
+    // is zero or greater than the width of the value type.
+    if (ShiftImm == 0 || ShiftImm >=32)
+      return false;
+
+    Opc = ARM::MOVsi;
+  }
+
+  Value *Src1Value = I->getOperand(0);
+  unsigned Reg1 = getRegForValue(Src1Value);
+  if (Reg1 == 0) return false;
+
+  unsigned Reg2;
+  if (Opc == ARM::MOVsr) {
+    Reg2 = getRegForValue(Src2Value);
+    if (Reg2 == 0) return false;
+  }
+
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  if(ResultReg == 0) return false;
+
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(Opc), ResultReg)
+                            .addReg(Reg1);
+
+  if (Opc == ARM::MOVsi)
+    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
+  else if (Opc == ARM::MOVsr) {
+    MIB.addReg(Reg2);
+    MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
+  }
+
+  AddOptionalDefs(MIB);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
 // TODO: SoftFP support.
 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
 
@@ -2668,6 +2735,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
     case Instruction::ZExt:
     case Instruction::SExt:
       return SelectIntExt(I);
+    case Instruction::Shl:
+      return SelectShift(I, ARM_AM::lsl);
+    case Instruction::LShr:
+      return SelectShift(I, ARM_AM::lsr);
+    case Instruction::AShr:
+      return SelectShift(I, ARM_AM::asr);
     default: break;
   }
   return false;
@@ -2720,14 +2793,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
 }
 
 namespace llvm {
-  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
+                                const TargetLibraryInfo *libInfo) {
     // Completely untested on non-iOS.
     const TargetMachine &TM = funcInfo.MF->getTarget();
 
     // Darwin and thumb1 only for now.
     const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
     if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
-      return new ARMFastISel(funcInfo);
+      return new ARMFastISel(funcInfo, libInfo);
     return 0;
   }
 }
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2629496..aee72d2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,6 +15,8 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Function.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   int FramePtrSpillFI = 0;
   int D8SpillFI = 0;
 
+  // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
   // Allocate the vararg register save area. This is not counted in NumBytes.
   if (VARegSaveSize)
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
+  // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1953192..c6f9d15 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
   cl::desc("Check fp vmla / vmls hazard at isel time"),
   cl::init(true));
 
-static cl::opt<bool>
-DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
-  cl::desc("Enable / disable ARM integer abs transform"),
-  cl::init(false));
-
 //===--------------------------------------------------------------------===//
 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
 /// instructions for SelectionDAG operations.
@@ -244,7 +239,6 @@ private:
 
   /// SelectCMOVOp - Select CMOV instructions for ARM.
   SDNode *SelectCMOVOp(SDNode *N);
-  SDNode *SelectConditionalOp(SDNode *N);
   SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                               ARMCC::CondCodes CCVal, SDValue CCR,
                               SDValue InFlag);
@@ -2368,115 +2362,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
 }
 
-SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
-  SDValue FalseVal = N->getOperand(0);
-  SDValue TrueVal  = N->getOperand(1);
-  ARMCC::CondCodes CCVal =
-    (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
-  SDValue CCR = N->getOperand(3);
-  assert(CCR.getOpcode() == ISD::Register);
-  SDValue InFlag = N->getOperand(4);
-  SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
-  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-
-  if (Subtarget->isThumb()) {
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-    if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
-      unsigned Opc;
-      switch (N->getOpcode()) {
-      default: llvm_unreachable("Unexpected node");
-      case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
-      case ARMISD::COR:  Opc = ARM::t2ORRCCrs; break;
-      case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
-      }
-      SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
-    }
-
-    ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
-    if (T) {
-      unsigned TrueImm = T->getZExtValue();
-      if (is_t2_so_imm(TrueImm)) {
-        unsigned Opc;
-        switch (N->getOpcode()) {
-        default: llvm_unreachable("Unexpected node");
-        case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
-        case ARMISD::COR:  Opc = ARM::t2ORRCCri; break;
-        case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
-        }
-        SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-        SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-      }
-    }
-
-    unsigned Opc;
-    switch (N->getOpcode()) {
-    default: llvm_unreachable("Unexpected node");
-    case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
-    case ARMISD::COR:  Opc = ARM::t2ORRCCrr; break;
-    case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
-    }
-    SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-  }
-
-  SDValue CPTmp0;
-  SDValue CPTmp1;
-  SDValue CPTmp2;
-  if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
-    unsigned Opc;
-    switch (N->getOpcode()) {
-    default: llvm_unreachable("Unexpected node");
-    case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
-    case ARMISD::COR:  Opc = ARM::ORRCCrsi; break;
-    case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
-    }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
-  }
-
-  if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
-    unsigned Opc;
-    switch (N->getOpcode()) {
-    default: llvm_unreachable("Unexpected node");
-    case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
-    case ARMISD::COR:  Opc = ARM::ORRCCrsr; break;
-    case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
-    }
-    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
-    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
-  }
-
-  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
-  if (T) {
-    unsigned TrueImm = T->getZExtValue();
-    if (is_so_imm(TrueImm)) {
-      unsigned Opc;
-      switch (N->getOpcode()) {
-      default: llvm_unreachable("Unexpected node");
-      case ARMISD::CAND: Opc = ARM::ANDCCri; break;
-      case ARMISD::COR:  Opc = ARM::ORRCCri; break;
-      case ARMISD::CXOR: Opc = ARM::EORCCri; break;
-      }
-      SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
-      SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
-      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-    }
-  }
-
-  unsigned Opc;
-  switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected node");
-  case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
-  case ARMISD::COR:  Opc = ARM::ORRCCrr; break;
-  case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
-  }
-  SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
-  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
-}
-
 /// Target-specific DAG combining for ISD::XOR.
 /// Target-independent combining lowers SELECT_CC nodes of the form
 /// select_cc setg[ge] X,  0,  X, -X
@@ -2492,14 +2377,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
   SDValue XORSrc1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
 
-  if (DisableARMIntABS)
-    return NULL;
-
   if (Subtarget->isThumb1Only())
     return NULL;
 
-  if (XORSrc0.getOpcode() != ISD::ADD ||
-    XORSrc1.getOpcode() != ISD::SRA)
+  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
     return NULL;
 
   SDValue ADDSrc0 = XORSrc0.getOperand(0);
@@ -2510,16 +2391,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
   EVT XType = SRASrc0.getValueType();
   unsigned Size = XType.getSizeInBits() - 1;
 
-  if (ADDSrc1 == XORSrc1  &&
-      ADDSrc0 == SRASrc0 &&
-      XType.isInteger() &&
-      SRAConstant != NULL &&
+  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
+      XType.isInteger() && SRAConstant != NULL &&
       Size == SRAConstant->getZExtValue()) {
-
-    unsigned Opcode = ARM::ABS;
-    if (Subtarget->isThumb2())
-      Opcode = ARM::t2ABS;
-
+    unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
   }
 
@@ -2814,10 +2689,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
   case ARMISD::CMOV:
     return SelectCMOVOp(N);
-  case ARMISD::CAND:
-  case ARMISD::COR:
-  case ARMISD::CXOR:
-    return SelectConditionalOp(N);
   case ARMISD::VZIP: {
     unsigned Opc = 0;
     EVT VT = N->getValueType(0);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 04370c0..df4039b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -90,75 +90,70 @@ static const uint16_t GPRArgRegs[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3
 };
 
-void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
-                                       EVT PromotedBitwiseVT) {
+void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
+                                       MVT PromotedBitwiseVT) {
   if (VT != PromotedLdStVT) {
-    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
-    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
-                       PromotedLdStVT.getSimpleVT());
+    setOperationAction(ISD::LOAD, VT, Promote);
+    AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
 
-    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
-    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
-                       PromotedLdStVT.getSimpleVT());
+    setOperationAction(ISD::STORE, VT, Promote);
+    AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
   }
 
-  EVT ElemTy = VT.getVectorElementType();
+  MVT ElemTy = VT.getVectorElementType();
   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
-    setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SETCC, VT, Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
   if (ElemTy == MVT::i32) {
-    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
-    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
-    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
-    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
+    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
+    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
   } else {
-    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
-    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
-  }
-  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
-  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
-  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
-  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+  }
+  setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
+  setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+  setOperationAction(ISD::SELECT,            VT, Expand);
+  setOperationAction(ISD::SELECT_CC,         VT, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
   if (VT.isInteger()) {
-    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
-    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
-    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SHL, VT, Custom);
+    setOperationAction(ISD::SRA, VT, Custom);
+    setOperationAction(ISD::SRL, VT, Custom);
   }
 
   // Promote all bit-wise operations.
   if (VT.isInteger() && VT != PromotedBitwiseVT) {
-    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
-    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
-                       PromotedBitwiseVT.getSimpleVT());
-    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
-    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
-                       PromotedBitwiseVT.getSimpleVT());
-    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
-    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
-                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::AND, VT, Promote);
+    AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
+    setOperationAction(ISD::OR,  VT, Promote);
+    AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
+    setOperationAction(ISD::XOR, VT, Promote);
+    AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
   }
 
   // Neon does not support vector divide/remainder operations.
-  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
-  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SDIV, VT, Expand);
+  setOperationAction(ISD::UDIV, VT, Expand);
+  setOperationAction(ISD::FDIV, VT, Expand);
+  setOperationAction(ISD::SREM, VT, Expand);
+  setOperationAction(ISD::UREM, VT, Expand);
+  setOperationAction(ISD::FREM, VT, Expand);
 }
 
-void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
   addRegisterClass(VT, &ARM::DPRRegClass);
   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
 }
 
-void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
   addRegisterClass(VT, &ARM::QPRRegClass);
   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
 }
@@ -903,9 +898,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
 
   case ARMISD::CMOV:          return "ARMISD::CMOV";
-  case ARMISD::CAND:          return "ARMISD::CAND";
-  case ARMISD::COR:           return "ARMISD::COR";
-  case ARMISD::CXOR:          return "ARMISD::CXOR";
 
   case ARMISD::RBIT:          return "ARMISD::RBIT";
 
@@ -1041,8 +1033,9 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
 
 // Create a fast isel object.
 FastISel *
-ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
-  return ARM::createFastISel(funcInfo);
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+                                  const TargetLibraryInfo *libInfo) const {
+  return ARM::createFastISel(funcInfo, libInfo);
 }
 
 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
@@ -1171,6 +1164,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+  case CallingConv::GHC:
+    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
   }
 }
 
@@ -4271,6 +4266,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
 
     // Record this extraction against the appropriate vector if possible...
     SDValue SourceVec = V.getOperand(0);
+    // If the element number isn't a constant, we can't effectively
+    // analyze what's going on.
+    if (!isa<ConstantSDNode>(V.getOperand(1)))
+      return SDValue();
     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
     bool FoundSource = false;
     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
@@ -6152,13 +6151,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   }
 
   // Add the jump table entries as successors to the MBB.
-  MachineBasicBlock *PrevMBB = 0;
+  SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
   for (std::vector<MachineBasicBlock*>::iterator
          I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
     MachineBasicBlock *CurMBB = *I;
-    if (PrevMBB != CurMBB)
+    if (SeenMBBs.insert(CurMBB))
       DispContBB->addSuccessor(CurMBB);
-    PrevMBB = CurMBB;
   }
 
   // N.B. the order the invoke BBs are processed in doesn't matter here.
@@ -6971,62 +6969,137 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 //                           ARM Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+// Helper function that checks if N is a null or all ones constant.
+static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  if (!C)
+    return false;
+  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+}
+
+// Return true if N is conditionally 0 or all ones.
+// Detects these expressions where cc is an i1 value:
+//
+//   (select cc 0, y)   [AllOnes=0]
+//   (select cc y, 0)   [AllOnes=0]
+//   (zext cc)          [AllOnes=0]
+//   (sext cc)          [AllOnes=0/1]
+//   (select cc -1, y)  [AllOnes=1]
+//   (select cc y, -1)  [AllOnes=1]
+//
+// Invert is set when N is the null/all ones constant when CC is false.
+// OtherOp is set to the alternative value of N.
+static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
+                                       SDValue &CC, bool &Invert,
+                                       SDValue &OtherOp,
+                                       SelectionDAG &DAG) {
+  switch (N->getOpcode()) {
+  default: return false;
+  case ISD::SELECT: {
+    CC = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    if (isZeroOrAllOnes(N1, AllOnes)) {
+      Invert = false;
+      OtherOp = N2;
+      return true;
+    }
+    if (isZeroOrAllOnes(N2, AllOnes)) {
+      Invert = true;
+      OtherOp = N1;
+      return true;
+    }
+    return false;
+  }
+  case ISD::ZERO_EXTEND:
+    // (zext cc) can never be the all ones value.
+    if (AllOnes)
+      return false;
+    // Fall through.
+  case ISD::SIGN_EXTEND: {
+    EVT VT = N->getValueType(0);
+    CC = N->getOperand(0);
+    if (CC.getValueType() != MVT::i1)
+      return false;
+    Invert = !AllOnes;
+    if (AllOnes)
+      // When looking for an AllOnes constant, N is an sext, and the 'other'
+      // value is 0.
+      OtherOp = DAG.getConstant(0, VT);
+    else if (N->getOpcode() == ISD::ZERO_EXTEND)
+      // When looking for a 0 constant, N can be zext or sext.
+      OtherOp = DAG.getConstant(1, VT);
+    else
+      OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+    return true;
+  }
+  }
+}
+
+// Combine a constant select operand into its use:
+//
+//   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
+//   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
+//   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
+//   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
+//   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
+//
+// The transform is rejected if the select doesn't have a constant operand that
+// is null, or all ones when AllOnes is set.
+//
+// Also recognize sext/zext from i1:
+//
+//   (add (zext cc), x) -> (select cc (add x, 1), x)
+//   (add (sext cc), x) -> (select cc (add x, -1), x)
+//
+// These transformations eventually create predicated instructions.
+//
+// @param N       The node to transform.
+// @param Slct    The N operand that is a select.
+// @param OtherOp The other N operand (x above).
+// @param DCI     Context.
+// @param AllOnes Require the select constant to be all ones instead of null.
+// @returns The new node, or SDValue() on failure.
 static
 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
-                            TargetLowering::DAGCombinerInfo &DCI) {
+                            TargetLowering::DAGCombinerInfo &DCI,
+                            bool AllOnes = false) {
   SelectionDAG &DAG = DCI.DAG;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = N->getValueType(0);
-  unsigned Opc = N->getOpcode();
-  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
-  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
-  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
-  ISD::CondCode CC = ISD::SETCC_INVALID;
-
-  if (isSlctCC) {
-    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
-  } else {
-    SDValue CCOp = Slct.getOperand(0);
-    if (CCOp.getOpcode() == ISD::SETCC)
-      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
-  }
-
-  bool DoXform = false;
-  bool InvCC = false;
-  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
-          "Bad input!");
-
-  if (LHS.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(LHS)->isNullValue()) {
-    DoXform = true;
-  } else if (CC != ISD::SETCC_INVALID &&
-             RHS.getOpcode() == ISD::Constant &&
-             cast<ConstantSDNode>(RHS)->isNullValue()) {
-    std::swap(LHS, RHS);
-    SDValue Op0 = Slct.getOperand(0);
-    EVT OpVT = isSlctCC ? Op0.getValueType() :
-                          Op0.getOperand(0).getValueType();
-    bool isInt = OpVT.isInteger();
-    CC = ISD::getSetCCInverse(CC, isInt);
-
-    if (!TLI.isCondCodeLegal(CC, OpVT))
-      return SDValue();         // Inverse operator isn't legal.
-
-    DoXform = true;
-    InvCC = true;
-  }
-
-  if (DoXform) {
-    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
-    if (isSlctCC)
-      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
-                             Slct.getOperand(0), Slct.getOperand(1), CC);
-    SDValue CCOp = Slct.getOperand(0);
-    if (InvCC)
-      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
-                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
-    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
-                       CCOp, OtherOp, Result);
+  SDValue NonConstantVal;
+  SDValue CCOp;
+  bool SwapSelectOps;
+  if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
+                                  NonConstantVal, DAG))
+    return SDValue();
+
+  // Slct is now know to be the desired identity constant when CC is true.
+  SDValue TrueVal = OtherOp;
+  SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+                                 OtherOp, NonConstantVal);
+  // Unless SwapSelectOps says CC should be false.
+  if (SwapSelectOps)
+    std::swap(TrueVal, FalseVal);
+
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                     CCOp, TrueVal, FalseVal);
+}
+
+// Attempt combineSelectAndUse on each operand of a commutative operator N.
+static
+SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
+                                       TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  if (N0.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
+    if (Result.getNode())
+      return Result;
+  }
+  if (N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
+    if (Result.getNode())
+      return Result;
   }
   return SDValue();
 }
@@ -7134,7 +7207,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
     return Result;
 
   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
-  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+  if (N0.getNode()->hasOneUse()) {
     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
     if (Result.getNode()) return Result;
   }
@@ -7166,7 +7239,7 @@ static SDValue PerformSUBCombine(SDNode *N,
   SDValue N1 = N->getOperand(1);
 
   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
-  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+  if (N1.getNode()->hasOneUse()) {
     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
     if (Result.getNode()) return Result;
   }
@@ -7294,49 +7367,6 @@ static SDValue PerformMULCombine(SDNode *N,
   return SDValue();
 }
 
-static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
-  if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
-    return false;
-
-  SDValue FalseVal = N.getOperand(0);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
-  if (!C)
-    return false;
-  if (AllOnes)
-    return C->isAllOnesValue();
-  return C->isNullValue();
-}
-
-/// formConditionalOp - Combine an operation with a conditional move operand
-/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
-/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
-static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
-                                 bool Commutable) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-
-  bool isAND = N->getOpcode() == ISD::AND;
-  bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
-  if (!isCand && Commutable) {
-    isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
-    if (isCand)
-      std::swap(N0, N1);
-  }
-  if (!isCand)
-    return SDValue();
-
-  unsigned Opc = 0;
-  switch (N->getOpcode()) {
-  default: llvm_unreachable("Unexpected node");
-  case ISD::AND: Opc = ARMISD::CAND; break;
-  case ISD::OR:  Opc = ARMISD::COR; break;
-  case ISD::XOR: Opc = ARMISD::CXOR; break;
-  }
-  return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
-                     N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
-                     N1.getOperand(4));
-}
-
 static SDValue PerformANDCombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const ARMSubtarget *Subtarget) {
@@ -7371,10 +7401,10 @@ static SDValue PerformANDCombine(SDNode *N,
   }
 
   if (!Subtarget->isThumb1Only()) {
-    // (and x, (cmov -1, y, cond)) => (and.cond x, y)
-    SDValue CAND = formConditionalOp(N, DAG, true);
-    if (CAND.getNode())
-      return CAND;
+    // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
+    SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
+    if (Result.getNode())
+      return Result;
   }
 
   return SDValue();
@@ -7414,14 +7444,17 @@ static SDValue PerformORCombine(SDNode *N,
   }
 
   if (!Subtarget->isThumb1Only()) {
-    // (or x, (cmov 0, y, cond)) => (or.cond x, y)
-    SDValue COR = formConditionalOp(N, DAG, true);
-    if (COR.getNode())
-      return COR;
+    // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
+    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+    if (Result.getNode())
+      return Result;
   }
 
+  // The code below optimizes (or (and X, Y), Z).
+  // The AND operand needs to have a single user to make these optimizations
+  // profitable.
   SDValue N0 = N->getOperand(0);
-  if (N0.getOpcode() != ISD::AND)
+  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
     return SDValue();
   SDValue N1 = N->getOperand(1);
 
@@ -7578,10 +7611,10 @@ static SDValue PerformXORCombine(SDNode *N,
     return SDValue();
 
   if (!Subtarget->isThumb1Only()) {
-    // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
-    SDValue CXOR = formConditionalOp(N, DAG, true);
-    if (CXOR.getNode())
-      return CXOR;
+    // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
+    SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
+    if (Result.getNode())
+      return Result;
   }
 
   return SDValue();
@@ -8802,6 +8835,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   case MVT::i16:
   case MVT::i32:
     return true;
+  case MVT::f64:
+    return Subtarget->hasNEON();
   // FIXME: VLD1 etc with standard alignment is legal.
   }
 }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7ad48b9..13b83de 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -63,9 +63,6 @@ namespace llvm {
       FMSTAT,       // ARM fmstat instruction.
 
       CMOV,         // ARM conditional move instructions.
-      CAND,         // ARM conditional and instructions.
-      COR,          // ARM conditional or instructions.
-      CXOR,         // ARM conditional xor instructions.
 
       BCC_i64,
 
@@ -361,7 +358,8 @@ namespace llvm {
 
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
-    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                                     const TargetLibraryInfo *libInfo) const;
 
     Sched::Preference getSchedulingPreference(SDNode *N) const;
 
@@ -393,9 +391,9 @@ namespace llvm {
     ///
     unsigned ARMPCLabelIndex;
 
-    void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
-    void addDRTypeForNEON(EVT VT);
-    void addQRTypeForNEON(EVT VT);
+    void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
+    void addDRTypeForNEON(MVT VT);
+    void addQRTypeForNEON(MVT VT);
 
     typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
     void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -544,7 +542,8 @@ namespace llvm {
 
 
   namespace ARM {
-    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                             const TargetLibraryInfo *libInfo);
   }
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1b8fc3f..992aba5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -242,6 +242,9 @@ def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion =="
 def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                  "Subtarget->isTargetDarwin()">;
 
+def IsLE             : Predicate<"TLI.isLittleEndian()">;
+def IsBE             : Predicate<"TLI.isBigEndian()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -416,8 +419,11 @@ def pclabel : Operand<i32> {
 }
 
 // ADR instruction labels.
+def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; }
 def adrlabel : Operand<i32> {
   let EncoderMethod = "getAdrLabelOpValue";
+  let ParserMatchClass = AdrLabelAsmOperand;
+  let PrintMethod = "printAdrLabelOperand";
 }
 
 def neon_vcvt_imm32 : Operand<i32> {
@@ -968,7 +974,7 @@ include "ARMInstrFormats.td"
 let TwoOperandAliasConstraint = "$Rn = $Rd" in
 multiclass AsI1_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                        PatFrag opnode, string baseOpc, bit Commutable = 0> {
+                        PatFrag opnode, bit Commutable = 0> {
   // The register-immediate version is re-materializable. This is useful
   // in particular for taking the address of a local.
   let isReMaterializable = 1 in {
@@ -1037,7 +1043,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
 let TwoOperandAliasConstraint = "$Rn = $Rd" in
 multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                        PatFrag opnode, string baseOpc, bit Commutable = 0> {
+                        PatFrag opnode, bit Commutable = 0> {
   // The register-immediate version is re-materializable. This is useful
   // in particular for taking the address of a local.
   let isReMaterializable = 1 in {
@@ -1285,7 +1291,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc>
 /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
 let TwoOperandAliasConstraint = "$Rn = $Rd" in
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
-                             string baseOpc, bit Commutable = 0> {
+                             bit Commutable = 0> {
   let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1351,8 +1357,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 
 /// AI1_rsc_irs - Define instructions and patterns for rsc
 let TwoOperandAliasConstraint = "$Rn = $Rd" in
-multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode,
-                       string baseOpc> {
+multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
   let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -2816,9 +2821,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
   let Inst{15-12} = Rd;
 }
 
-def : ARMInstAlias<"movs${p} $Rd, $Rm",
-                   (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
-
 // A version for the smaller set of tail call registers.
 let neverHasSideEffects = 1 in
 def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@@ -3029,10 +3031,10 @@ def UBFX  : I<(outs GPR:$Rd),
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(add  node:$LHS, node:$RHS)>, "ADD", 1>;
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(sub  node:$LHS, node:$RHS)>, "SUB">;
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set.
 //
@@ -3050,15 +3052,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                            BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
-                  BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
-                          "ADC", 1>;
+              BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
-                  BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
-                          "SBC">;
+              BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
 
-defm RSB  : AsI1_rbin_irs <0b0011, "rsb",
-                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">;
+defm RSB  : AsI1_rbin_irs<0b0011, "rsb",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                          BinOpFrag<(sub node:$LHS, node:$RHS)>>;
 
 // FIXME: Eliminate them if we can write def : Pat patterns which defines
 // CPSR and the implicit def of CPSR is not needed.
@@ -3066,8 +3066,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                            BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 defm RSC : AI1_rsc_irs<0b0111, "rsc",
-                  BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
-                       "RSC">;
+                BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
 // The assume-no-carry-in form uses the negation of the input since add/sub
@@ -3276,16 +3275,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(or  node:$LHS, node:$RHS)>, "ORR", 1>;
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
 // FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just
 // like in the actual instruction encoding. The complexity of mapping the mask
@@ -3940,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs),
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
                            4, IIC_iCMOVr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
@@ -3993,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
                           InstrItinClass iii, InstrItinClass iir,
                           InstrItinClass iis> {
   def ri  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm,
+                                 pred:$p, cc_out:$s),
                             4, iii, [],
                        (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rr  : ARMPseudoExpand<(outs GPR:$Rd),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm,
+                                 pred:$p, cc_out:$s),
                             4, iir, [],
                            (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rsi : ARMPseudoExpand<(outs GPR:$Rd),
-                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+                            (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift,
+                                 pred:$p, cc_out:$s),
                             4, iis, [],
                 (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
   def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
-                       (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+                           (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift,
+                                pred:$p, cc_out:$s),
                             4, iis, [],
                 (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
-                            RegConstraint<"$Rn = $Rd">;
+                            RegConstraint<"$Rfalse = $Rd">;
 }
 
 defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
@@ -4020,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
                             IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
                             IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
 
 } // neverHasSideEffects
 
@@ -4068,11 +4075,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
 
 // Pseudo instruction that combines movs + predicated rsbmi
 // to implement integer ABS
-let usesCustomInserter = 1, Defs = [CPSR] in {
-def ABS : ARMPseudoInst<
-  (outs GPR:$dst), (ins GPR:$src),
-  8, NoItinerary, []>;
-}
+let usesCustomInserter = 1, Defs = [CPSR] in
+def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>;
 
 let usesCustomInserter = 1 in {
   let Defs = [CPSR] in {
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3134088..048d340 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> {
   let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
 }
 
+def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 2;
+}]>;
+def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                 (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 2;
+}]>;
+def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() == 1;
+}]>;
+def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() == 1;
+}]>;
+def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                                    (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
 
 //===----------------------------------------------------------------------===//
 // NEON-specific DAG Nodes.
@@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
 
 } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
 
+// Use vld1/vst1 for unaligned f64 load / store
+def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
+          (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
+          (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>;
+def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>;
+def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
+          (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>;
+def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
+          (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
 
 //===----------------------------------------------------------------------===//
 // NEON pattern fragments
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index d83530a..8ecf009 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -172,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> {
 // ADR instruction labels.
 def t2adrlabel : Operand<i32> {
   let EncoderMethod = "getT2AdrLabelOpValue";
+  let PrintMethod = "printAdrLabelOperand";
 }
 
 
@@ -529,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
 /// changed to modify CPSR.
 multiclass T2I_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, string baseOpc, bit Commutable = 0,
+                       PatFrag opnode, bit Commutable = 0,
                        string wide = ""> {
    // shifted imm
    def ri : T2sTwoRegImm<
@@ -565,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
   // Assembly aliases for optional destination operand when it's the same
   // as the source operand.
   def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn,
                                                     t2_so_imm:$imm, pred:$p,
                                                     cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn,
                                                     rGPR:$Rm, pred:$p,
                                                     cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+     (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn,
                                                     t2_so_reg:$shift, pred:$p,
                                                     cc_out:$s)>;
 }
@@ -582,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
 //  the ".w" suffix to indicate that they are wide.
 multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                         PatFrag opnode, string baseOpc, bit Commutable = 0> :
-    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+                         PatFrag opnode, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> {
   // Assembler aliases w/ the ".w" suffix.
   def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
-                                                    t2_so_imm:$imm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p,
+                                    cc_out:$s)>;
   // Assembler aliases w/o the ".w" suffix.
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
-                                                    rGPR:$Rm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn,
-                                                    t2_so_reg:$shift, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift,
+                                    pred:$p, cc_out:$s)>;
 
   // and with the optional destination operand, too.
   def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    t2_so_imm:$imm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm,
+                                    pred:$p, cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    rGPR:$Rm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    t2_so_reg:$shift, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift,
+                                    pred:$p, cc_out:$s)>;
 }
 
 /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
@@ -762,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
      let Inst{24} = 1;
      let Inst{23-21} = op23_21;
    }
+
+   // Predicated versions.
+   def CCri : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm,
+                                  pred:$p),
+                             4, IIC_iALUi, [],
+                             (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd,
+                              GPR:$Rn, imm0_4095:$imm, pred:$p)>,
+                RegConstraint<"$Rfalse = $Rd">;
+   def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUr, [],
+                             (!cast<Instruction>(NAME#rr) GPRnopc:$Rd,
+                              GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
+   def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd),
+                             (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm,
+                                  pred:$p, cc_out:$s), 4, IIC_iALUsi, [],
+                             (!cast<Instruction>(NAME#rs) GPRnopc:$Rd,
+                              GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>,
+              RegConstraint<"$Rfalse = $Rd">;
 }
 
 /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
@@ -808,8 +830,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 
 /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
 //  rotate operation that produces a value.
-multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
-                     string baseOpc> {
+multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> {
    // 5-bit imm
    def ri : T2sTwoRegShiftImm<
                  (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi,
@@ -834,33 +855,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
 
   // Optional destination register
   def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    ty:$imm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    rGPR:$Rm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+                                    cc_out:$s)>;
 
   // Assembler aliases w/o the ".w" suffix.
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
-                                                    ty:$imm, pred:$p,
-                                                   cc_out:$s)>;
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p,
+                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
-                                                    rGPR:$Rm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p,
+                                    cc_out:$s)>;
 
   // and with the optional destination operand, too.
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    ty:$imm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p,
+                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
-                                                    rGPR:$Rm, pred:$p,
-                                                    cc_out:$s)>;
+     (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+                                    cc_out:$s)>;
 }
 
 /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
@@ -868,7 +883,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
 /// a explicit result, only implicitly set CPSR.
 multiclass T2I_cmp_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, string baseOpc> {
+                       PatFrag opnode> {
 let isCompare = 1, Defs = [CPSR] in {
    // shifted imm
    def ri : T2OneRegCmpImm<
@@ -913,12 +928,9 @@ let isCompare = 1, Defs = [CPSR] in {
   // No alias here for 'rr' version as not all instantiations of this
   // multiclass want one (CMP in particular, does not).
   def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"),
-     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn,
-                                                    t2_so_imm:$imm, pred:$p)>;
+     (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
   def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn,
-                                                    t2_so_reg:$shift,
-                                                    pred:$p)>;
+     (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
 }
 
 /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
@@ -2152,13 +2164,13 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
 //
 
 defm t2LSL  : T2I_sh_ir<0b00, "lsl", imm0_31,
-                        BinOpFrag<(shl  node:$LHS, node:$RHS)>, "t2LSL">;
+                        BinOpFrag<(shl  node:$LHS, node:$RHS)>>;
 defm t2LSR  : T2I_sh_ir<0b01, "lsr", imm_sr,
-                        BinOpFrag<(srl  node:$LHS, node:$RHS)>, "t2LSR">;
+                        BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
 defm t2ASR  : T2I_sh_ir<0b10, "asr", imm_sr,
-                        BinOpFrag<(sra  node:$LHS, node:$RHS)>, "t2ASR">;
+                        BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
 defm t2ROR  : T2I_sh_ir<0b11, "ror", imm0_31,
-                        BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">;
+                        BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
 
 // (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
 def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
@@ -2214,18 +2226,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
 
 defm t2AND  : T2I_bin_w_irs<0b0000, "and",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
+                            BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
 defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(or  node:$LHS, node:$RHS)>, "t2ORR", 1>;
+                            BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
+                            BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 
 defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, (not node:$RHS))>,
-                            "t2BIC">;
+                            BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
 class T2BitFI<dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
@@ -2305,8 +2316,7 @@ let Constraints = "$src = $Rd" in {
 
 defm t2ORN  : T2I_bin_irs<0b0011, "orn",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>,
-                          "t2ORN", 0, "">;
+                          BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
 
 /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
 /// unary operation that produces a value. These are predicable and can be
@@ -2878,7 +2888,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
 //
 defm t2CMP  : T2I_cmp_irs<0b1101, "cmp",
                           IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
-                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">;
+                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
 
 def : T2Pat<(ARMcmpZ  GPRnopc:$lhs, t2_so_imm:$imm),
             (t2CMPri  GPRnopc:$lhs, t2_so_imm:$imm)>;
@@ -2932,13 +2942,10 @@ let isCompare = 1, Defs = [CPSR] in {
 // Assembler aliases w/o the ".w" suffix.
 // No alias here for 'rr' version as not all instantiations of this multiclass
 // want one (CMP in particular, does not).
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $imm"),
-   (!cast<Instruction>(!strconcat("t2CMN", "ri")) GPRnopc:$Rn,
-                                                  t2_so_imm:$imm, pred:$p)>;
-def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $shift"),
-   (!cast<Instruction>(!strconcat("t2CMNz", "rs")) GPRnopc:$Rn,
-                                                  t2_so_reg:$shift,
-                                                  pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $imm",
+   (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rn, $shift",
+   (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>;
 
 def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
@@ -2948,19 +2955,17 @@ def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm),
 
 defm t2TST  : T2I_cmp_irs<0b0000, "tst",
                           IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
-                         BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>,
-                          "t2TST">;
+                         BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
 defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
                           IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
-                         BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>,
-                          "t2TEQ">;
+                         BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
 
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 
-let isCommutable = 1 in
+let isCommutable = 1, isSelect = 1 in
 def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
                             (ins rGPR:$false, rGPR:$Rm, pred:$p),
                             4, IIC_iCMOVr,
@@ -3048,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
                    InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
    // shifted imm
    def ri : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm,
+                                pred:$p, cc_out:$s),
                            4, iii, [],
                   (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
    // register
    def rr : t2PseudoExpand<(outs rGPR:$Rd),
-                           (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm,
+                                pred:$p, cc_out:$s),
                            4, iir, [],
                         (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
    // shifted register
    def rs : t2PseudoExpand<(outs rGPR:$Rd),
-                       (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+                           (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm,
+                                pred:$p, cc_out:$s),
                            4, iis, [],
             (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
-                           RegConstraint<"$Rn = $Rd">;
+                           RegConstraint<"$Rfalse = $Rd">;
 } // T2I_bincc_irs
 
 defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 23c132e..7d6692f 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>,
   let ParserMatchClass = FPImmOperand;
 }
 
+def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
 // The VCVT to/from fixed-point instructions encode the 'fbits' operand
 // (the number of fixed bits) differently than it appears in the assembly
 // source. It's encoded as "Size - fbits" where Size is the size of the
@@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", "\t$Dd, $addr",
-                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+                 [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
@@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
                  IIC_fpStore64, "vstr", "\t$Dd, $addr",
-                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+                 [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
@@ -433,25 +442,25 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
 // Between half-precision and single-precision.  For disassembly only.
 
 // FIXME: Verify encoding after integrated assembler is working.
-def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def : ARMPat<(f32_to_f16 SPR:$a),
-             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def : ARMPat<(f16_to_f32 GPR:$a),
-             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+def : Pat<(f32_to_f16 SPR:$a),
+          (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def : Pat<(f16_to_f32 GPR:$a),
+          (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
-def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>;
 
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index c5db211..357fc3f 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -291,9 +291,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
       if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
         ResultPtr = ResultPtr >> 2;
       *((intptr_t*)RelocPos) |= ResultPtr;
-      // Set register Rn to PC.
-      *((intptr_t*)RelocPos) |=
-        getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+      // Set register Rn to PC (which is register 15 on all architectures).
+      // FIXME: This avoids the need for register info in the JIT class.
+      *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift;
       break;
     }
     case ARM::reloc_arm_so_imm_cp_entry: {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb1b2a2..897ceb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   DebugLoc dl = Loc->getDebugLoc();
   const MachineOperand &PMO = Loc->getOperand(0);
   unsigned PReg = PMO.getReg();
-  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
-    : getARMRegisterNumbering(PReg);
+  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
   unsigned Count = 1;
   unsigned Limit = ~0U;
 
@@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
     int NewOffset = MemOps[i].Offset;
     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
     unsigned Reg = MO.getReg();
-    unsigned RegNum = MO.isUndef() ? UINT_MAX
-      : getARMRegisterNumbering(Reg);
+    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
     // Register numbers must be in ascending order. For VFP / NEON load and
     // store multiples, the registers must also be consecutive and within the
     // limit on the number of registers per instruction.
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 3857647..6f974fd 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 // Registers are identified with 4-bit ID numbers.
-class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
-  field bits<4> Num;
+class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> {
+  let HWEncoding = Enc;
   let Namespace = "ARM";
   let SubRegs = subregs;
   // All bits of ARM registers with sub-registers are covered by sub-registers.
   let CoveredBySubRegs = 1;
 }
 
-class ARMFReg<bits<6> num, string n> : Register<n> {
-  field bits<6> Num;
+class ARMFReg<bits<16> Enc, string n> : Register<n> {
+  let HWEncoding = Enc;
   let Namespace = "ARM";
 }
 
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 56197d4..2c63825 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1069,6 +1069,7 @@ def CortexA8Model : SchedMachineModel {
   let LoadLatency = 2; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
+  let MispredictPenalty = 13; // Based on estimate of pipeline depth.
 
   let Itineraries = CortexA8Itineraries;
 }
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 738974e..7bc590f 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1886,6 +1886,7 @@ def CortexA9Model : SchedMachineModel {
   let LoadLatency = 2; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
+  let MispredictPenalty = 8; // Based on estimate of pipeline depth.
 
   let Itineraries = CortexA9Itineraries;
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e067a9f..89e29ad 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -97,6 +97,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   if (!HasV6T2Ops && hasThumb2())
     HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
 
+  // Keep a pointer to static instruction cost data for the specified CPU.
+  SchedModel = getSchedModelForCPU(CPUString);
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
@@ -179,15 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
 }
 
 unsigned ARMSubtarget::getMispredictionPenalty() const {
-  // If we have a reasonable estimate of the pipeline depth, then we can
-  // estimate the penalty of a misprediction based on that.
-  if (isCortexA8())
-    return 13;
-  else if (isCortexA9())
-    return 8;
-
-  // Otherwise, just return a sensible default.
-  return 10;
+  return SchedModel->MispredictPenalty;
 }
 
 bool ARMSubtarget::enablePostRAScheduler(
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06f..b394061 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -74,7 +74,7 @@ protected:
   /// HasThumb2 - True if Thumb2 instructions are supported.
   bool HasThumb2;
 
-  /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - 
+  /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs -
   /// v6m, v7m for example.
   bool IsMClass;
 
@@ -155,6 +155,9 @@ protected:
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
+  /// SchedModel - Processor specific instruction costs.
+  const MCSchedModel *SchedModel;
+
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4497720..3a5957b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -796,6 +796,13 @@ public:
     int64_t Value = CE->getValue();
     return Value > 0 && Value <= 32;
   }
+  bool isAdrLabel() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, but it can't fit 
+    // into shift immediate encoding, we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm())) return true;
+    else return (isARMSOImm() || isARMSOImmNeg());
+  }
   bool isARMSOImm() const {
     if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -1033,7 +1040,8 @@ public:
     // Immediate offset a multiple of 4 in range [-1020, 1020].
     if (!Memory.OffsetImm) return true;
     int64_t Val = Memory.OffsetImm->getValue();
-    return Val >= -1020 && Val <= 1020 && (Val & 3) == 0;
+    // Special case, #-0 is INT32_MIN.
+    return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN;
   }
   bool isMemImm0_1020s4Offset() const {
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1644,6 +1652,22 @@ public:
     Inst.addOperand(MCOperand::CreateImm(Imm));
   }
 
+  void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    assert(isImm() && "Not an immediate!");
+
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. 
+    if (!isa<MCConstantExpr>(getImm())) {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      return;
+    }
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    int Val = CE->getValue();
+    Inst.addOperand(MCOperand::CreateImm(Val));
+  }
+
   void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -2884,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       if (!RC->contains(EndReg))
         return Error(EndLoc, "invalid register in register list");
       // Ranges must go from low to high.
-      if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg))
+      if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg))
         return Error(EndLoc, "bad range in register list");
 
       // Add all the registers in the range to the register list.
@@ -2911,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
     // List must be monotonically increasing.
-    if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+    if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) {
       if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
         Warning(RegLoc, "register list not in ascending order");
       else
         return Error(RegLoc, "register list not in ascending order");
     }
-    if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+    if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) {
       Warning(RegLoc, "duplicated register (" + RegTok.getString() +
               ") in register list");
       continue;
@@ -3256,29 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
-  if (!Tok.is(AsmToken::Identifier))
-    return MatchOperand_NoMatch;
-  StringRef OptStr = Tok.getString();
-
-  unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
-    .Case("sy",    ARM_MB::SY)
-    .Case("st",    ARM_MB::ST)
-    .Case("sh",    ARM_MB::ISH)
-    .Case("ish",   ARM_MB::ISH)
-    .Case("shst",  ARM_MB::ISHST)
-    .Case("ishst", ARM_MB::ISHST)
-    .Case("nsh",   ARM_MB::NSH)
-    .Case("un",    ARM_MB::NSH)
-    .Case("nshst", ARM_MB::NSHST)
-    .Case("unst",  ARM_MB::NSHST)
-    .Case("osh",   ARM_MB::OSH)
-    .Case("oshst", ARM_MB::OSHST)
-    .Default(~0U);
+  unsigned Opt;
+
+  if (Tok.is(AsmToken::Identifier)) {
+    StringRef OptStr = Tok.getString();
+
+    Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
+      .Case("sy",    ARM_MB::SY)
+      .Case("st",    ARM_MB::ST)
+      .Case("sh",    ARM_MB::ISH)
+      .Case("ish",   ARM_MB::ISH)
+      .Case("shst",  ARM_MB::ISHST)
+      .Case("ishst", ARM_MB::ISHST)
+      .Case("nsh",   ARM_MB::NSH)
+      .Case("un",    ARM_MB::NSH)
+      .Case("nshst", ARM_MB::NSHST)
+      .Case("unst",  ARM_MB::NSHST)
+      .Case("osh",   ARM_MB::OSH)
+      .Case("oshst", ARM_MB::OSHST)
+      .Default(~0U);
 
-  if (Opt == ~0U)
-    return MatchOperand_NoMatch;
+    if (Opt == ~0U)
+      return MatchOperand_NoMatch;
+
+    Parser.Lex(); // Eat identifier token.
+  } else if (Tok.is(AsmToken::Hash) ||
+             Tok.is(AsmToken::Dollar) ||
+             Tok.is(AsmToken::Integer)) {
+    if (Parser.getTok().isNot(AsmToken::Integer))
+      Parser.Lex(); // Eat the '#'.
+    SMLoc Loc = Parser.getTok().getLoc();
+
+    const MCExpr *MemBarrierID;
+    if (getParser().ParseExpression(MemBarrierID)) {
+      Error(Loc, "illegal expression");
+      return MatchOperand_ParseFail;
+    }
+    
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
+    if (!CE) {
+      Error(Loc, "constant expression expected");
+      return MatchOperand_ParseFail;
+    }
+
+    int Val = CE->getValue();
+    if (Val & ~0xf) {
+      Error(Loc, "immediate value out of range");
+      return MatchOperand_ParseFail;
+    }
+
+    Opt = ARM_MB::RESERVED_0 + Val;
+  } else
+    return MatchOperand_ParseFail;
 
-  Parser.Lex(); // Eat identifier token.
   Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
   return MatchOperand_Success;
 }
@@ -5250,8 +5304,8 @@ validateInstruction(MCInst &Inst,
   case ARM::LDRD_POST:
   case ARM::LDREXD: {
     // Rt2 must be Rt + 1.
-    unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
-    unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+    unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+    unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
     if (Rt2 != Rt + 1)
       return Error(Operands[3]->getStartLoc(),
                    "destination operands must be sequential");
@@ -5259,8 +5313,8 @@ validateInstruction(MCInst &Inst,
   }
   case ARM::STRD: {
     // Rt2 must be Rt + 1.
-    unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg());
-    unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg());
+    unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
+    unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg());
     if (Rt2 != Rt + 1)
       return Error(Operands[3]->getStartLoc(),
                    "source operands must be sequential");
@@ -5270,8 +5324,8 @@ validateInstruction(MCInst &Inst,
   case ARM::STRD_POST:
   case ARM::STREXD: {
     // Rt2 must be Rt + 1.
-    unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg());
-    unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg());
+    unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg());
+    unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg());
     if (Rt2 != Rt + 1)
       return Error(Operands[3]->getStartLoc(),
                    "source operands must be sequential");
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 47cca2a..c90751d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -18,10 +18,12 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
 static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
-#include "ARMGenInstrInfo.inc"
 #include "ARMGenEDInfo.inc"
 
 static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
@@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                   (bytes[0] <<  0);
 
   // Calling the auto-generated decoder function.
-  DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI);
+  DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn,
+                                          Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     return result;
@@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   // VFP and NEON instructions, similarly, are shared between ARM
   // and Thumb modes.
   MI.clear();
-  result = decodeVFPInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     return result;
   }
 
   MI.clear();
-  result = decodeNEONDataInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONDupInstruction32(MI, insn, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     // Add a fake predicate operand, because we share these instruction
@@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   uint16_t insn16 = (bytes[1] << 8) | bytes[0];
-  DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI);
+  DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16,
+                                          Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 2;
     Check(result, AddThumbPredicate(MI));
@@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16,
+                             Address, this, STI);
   if (result) {
     Size = 2;
     bool InITBlock = ITBlock.instrInITBlock();
@@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumb2Instruction16(MI, insn16, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb216, MI, insn16,
+                             Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 2;
 
@@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                     (bytes[1] << 24) |
                     (bytes[0] << 16);
   MI.clear();
-  result = decodeThumbInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     bool InITBlock = ITBlock.instrInITBlock();
@@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeThumb2Instruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     Check(result, AddThumbPredicate(MI));
@@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeVFPInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     UpdateThumbVFPPredicate(MI);
@@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   MI.clear();
-  result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI);
+  result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address,
+                             this, STI);
   if (result != MCDisassembler::Fail) {
     Size = 4;
     Check(result, AddThumbPredicate(MI));
     return result;
   }
 
-  if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) {
+  if (fieldFromInstruction(insn32, 24, 8) == 0xF9) {
     MI.clear();
     uint32_t NEONLdStInsn = insn32;
     NEONLdStInsn &= 0xF0FFFFFF;
     NEONLdStInsn |= 0x04000000;
-    result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI);
+    result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn,
+                               Address, this, STI);
     if (result != MCDisassembler::Fail) {
       Size = 4;
       Check(result, AddThumbPredicate(MI));
@@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
-  if (fieldFromInstruction32(insn32, 24, 4) == 0xF) {
+  if (fieldFromInstruction(insn32, 24, 4) == 0xF) {
     MI.clear();
     uint32_t NEONDataInsn = insn32;
     NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
     NEONDataInsn |= 0x12000000; // Set bits 28 and 25
-    result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI);
+    result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn,
+                               Address, this, STI);
     if (result != MCDisassembler::Fail) {
       Size = 4;
       Check(result, AddThumbPredicate(MI));
@@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned imm = fieldFromInstruction32(Val, 7, 5);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned imm = fieldFromInstruction(Val, 7, 5);
 
   // Register-immediate
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned Rs = fieldFromInstruction32(Val, 8, 4);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned Rs = fieldFromInstruction(Val, 8, 4);
 
   // Register-register
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
@@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Vd = fieldFromInstruction32(Val, 8, 5);
-  unsigned regs = fieldFromInstruction32(Val, 0, 8);
+  unsigned Vd = fieldFromInstruction(Val, 8, 5);
+  unsigned regs = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Vd = fieldFromInstruction32(Val, 8, 5);
-  unsigned regs = fieldFromInstruction32(Val, 0, 8);
+  unsigned Vd = fieldFromInstruction(Val, 8, 5);
+  unsigned regs = fieldFromInstruction(Val, 0, 8);
 
   regs = regs >> 1;
 
@@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
   // the mask of all bits LSB-and-lower, and then xor them to create
   // the mask of that's all ones on [msb, lsb].  Finally we not it to
   // create the final mask.
-  unsigned msb = fieldFromInstruction32(Val, 5, 5);
-  unsigned lsb = fieldFromInstruction32(Val, 0, 5);
+  unsigned msb = fieldFromInstruction(Val, 5, 5);
+  unsigned lsb = fieldFromInstruction(Val, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
   if (lsb > msb) Check(S, MCDisassembler::SoftFail);
@@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned CRd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned coproc = fieldFromInstruction32(Insn, 8, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 8);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned CRd = fieldFromInstruction(Insn, 12, 4);
+  unsigned coproc = fieldFromInstruction(Insn, 8, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 8);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
 
   switch (Inst.getOpcode()) {
     case ARM::LDC_OFFSET:
@@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
                               uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned reg = fieldFromInstruction32(Insn, 25, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned reg = fieldFromInstruction(Insn, 25, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
 
   // On stores, the writeback operand precedes Rt.
   switch (Inst.getOpcode()) {
@@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   ARM_AM::AddrOpc Op = ARM_AM::add;
-  if (!fieldFromInstruction32(Insn, 23, 1))
+  if (!fieldFromInstruction(Insn, 23, 1))
     Op = ARM_AM::sub;
 
   bool writeback = (P == 0) || (W == 1);
@@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
     if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
       return MCDisassembler::Fail;
     ARM_AM::ShiftOpc Opc = ARM_AM::lsl;
-    switch( fieldFromInstruction32(Insn, 5, 2)) {
+    switch( fieldFromInstruction(Insn, 5, 2)) {
       case 0:
         Opc = ARM_AM::lsl;
         break;
@@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
       default:
         return MCDisassembler::Fail;
     }
-    unsigned amt = fieldFromInstruction32(Insn, 7, 5);
+    unsigned amt = fieldFromInstruction(Insn, 7, 5);
     unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode);
 
     Inst.addOperand(MCOperand::CreateImm(imm));
@@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
-  unsigned Rm = fieldFromInstruction32(Val,  0, 4);
-  unsigned type = fieldFromInstruction32(Val, 5, 2);
-  unsigned imm = fieldFromInstruction32(Val, 7, 5);
-  unsigned U = fieldFromInstruction32(Val, 12, 1);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
+  unsigned Rm = fieldFromInstruction(Val,  0, 4);
+  unsigned type = fieldFromInstruction(Val, 5, 2);
+  unsigned imm = fieldFromInstruction(Val, 7, 5);
+  unsigned U = fieldFromInstruction(Val, 12, 1);
 
   ARM_AM::ShiftOpc ShOp = ARM_AM::lsl;
   switch (type) {
@@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned type = fieldFromInstruction32(Insn, 22, 1);
-  unsigned imm = fieldFromInstruction32(Insn, 8, 4);
-  unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned type = fieldFromInstruction(Insn, 22, 1);
+  unsigned imm = fieldFromInstruction(Insn, 8, 4);
+  unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   unsigned Rt2 = Rt + 1;
 
   bool writeback = (W == 1) | (P == 0);
@@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
         S = MCDisassembler::SoftFail;
       if (Rt2 == 15)
         S = MCDisassembler::SoftFail;
-      if (!type && fieldFromInstruction32(Insn, 8, 4))
+      if (!type && fieldFromInstruction(Insn, 8, 4))
         S = MCDisassembler::SoftFail;
       break;
     case ARM::STRH:
@@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned mode = fieldFromInstruction32(Insn, 23, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned mode = fieldFromInstruction(Insn, 23, 2);
 
   switch (mode) {
     case 0:
@@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned reglist = fieldFromInstruction32(Insn, 0, 16);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned reglist = fieldFromInstruction(Insn, 0, 16);
 
   if (pred == 0xF) {
     switch (Inst.getOpcode()) {
@@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
     }
 
     // For stores (which become SRS's, the only operand is the mode.
-    if (fieldFromInstruction32(Insn, 20, 1) == 0) {
+    if (fieldFromInstruction(Insn, 20, 1) == 0) {
       Inst.addOperand(
-          MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4)));
+          MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4)));
       return S;
     }
 
@@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
 
 static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction32(Insn, 18, 2);
-  unsigned M = fieldFromInstruction32(Insn, 17, 1);
-  unsigned iflags = fieldFromInstruction32(Insn, 6, 3);
-  unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+  unsigned imod = fieldFromInstruction(Insn, 18, 2);
+  unsigned M = fieldFromInstruction(Insn, 17, 1);
+  unsigned iflags = fieldFromInstruction(Insn, 6, 3);
+  unsigned mode = fieldFromInstruction(Insn, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction32(Insn, 9, 2);
-  unsigned M = fieldFromInstruction32(Insn, 8, 1);
-  unsigned iflags = fieldFromInstruction32(Insn, 5, 3);
-  unsigned mode = fieldFromInstruction32(Insn, 0, 5);
+  unsigned imod = fieldFromInstruction(Insn, 9, 2);
+  unsigned M = fieldFromInstruction(Insn, 8, 1);
+  unsigned iflags = fieldFromInstruction(Insn, 5, 3);
+  unsigned mode = fieldFromInstruction(Insn, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 8, 4);
   unsigned imm = 0;
 
-  imm |= (fieldFromInstruction32(Insn, 0, 8) << 0);
-  imm |= (fieldFromInstruction32(Insn, 12, 3) << 8);
-  imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
-  imm |= (fieldFromInstruction32(Insn, 26, 1) << 11);
+  imm |= (fieldFromInstruction(Insn, 0, 8) << 0);
+  imm |= (fieldFromInstruction(Insn, 12, 3) << 8);
+  imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
+  imm |= (fieldFromInstruction(Insn, 26, 1) << 11);
 
   if (Inst.getOpcode() == ARM::t2MOVTi16)
     if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
   unsigned imm = 0;
 
-  imm |= (fieldFromInstruction32(Insn, 0, 12) << 0);
-  imm |= (fieldFromInstruction32(Insn, 16, 4) << 12);
+  imm |= (fieldFromInstruction(Insn, 0, 12) << 0);
+  imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
 
   if (Inst.getOpcode() == ARM::MOVTi16)
     if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Ra = fieldFromInstruction32(Insn, 12, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 8, 4);
+  unsigned Ra = fieldFromInstruction(Insn, 12, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (pred == 0xF)
     return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned add = fieldFromInstruction32(Val, 12, 1);
-  unsigned imm = fieldFromInstruction32(Val, 0, 12);
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
+  unsigned add = fieldFromInstruction(Val, 12, 1);
+  unsigned imm = fieldFromInstruction(Val, 0, 12);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned U = fieldFromInstruction32(Val, 8, 1);
-  unsigned imm = fieldFromInstruction32(Val, 0, 8);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned U = fieldFromInstruction(Val, 8, 1);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2077,11 +2090,11 @@ static DecodeStatus
 DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
                      uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
-                 (fieldFromInstruction32(Insn, 11, 1) << 18) |
-                 (fieldFromInstruction32(Insn, 13, 1) << 17) |
-                 (fieldFromInstruction32(Insn, 16, 6) << 11) |
-                 (fieldFromInstruction32(Insn, 26, 1) << 19);
+  unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) |
+                 (fieldFromInstruction(Insn, 11, 1) << 18) |
+                 (fieldFromInstruction(Insn, 13, 1) << 17) |
+                 (fieldFromInstruction(Insn, 16, 6) << 11) |
+                 (fieldFromInstruction(Insn, 26, 1) << 19);
   if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
                                 true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
@@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2;
 
   if (pred == 0xF) {
     Inst.setOpcode(ARM::BLXi);
-    imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+    imm |= fieldFromInstruction(Insn, 24, 1) << 1;
     if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
                                   true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
@@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  unsigned align = fieldFromInstruction32(Val, 4, 2);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  unsigned align = fieldFromInstruction(Val, 4, 2);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned wb = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned wb = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   // First output register
   switch (Inst.getOpcode()) {
@@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned wb = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 4, 2) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned wb = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 4, 2) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   // Writeback Operand
   switch (Inst.getOpcode()) {
@@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
-  unsigned size = fieldFromInstruction32(Insn, 6, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
+  unsigned size = fieldFromInstruction(Insn, 6, 2);
 
   align *= (1 << size);
 
@@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
-  unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
+  unsigned size = 1 << fieldFromInstruction(Insn, 6, 2);
   align *= 2*size;
 
   switch (Inst.getOpcode()) {
@@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned size = fieldFromInstruction32(Insn, 6, 2);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
-  unsigned align = fieldFromInstruction32(Insn, 4, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned size = fieldFromInstruction(Insn, 6, 2);
+  unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1;
+  unsigned align = fieldFromInstruction(Insn, 4, 1);
 
   if (size == 0x3) {
     size = 4;
@@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned imm = fieldFromInstruction32(Insn, 0, 4);
-  imm |= fieldFromInstruction32(Insn, 16, 3) << 4;
-  imm |= fieldFromInstruction32(Insn, 24, 1) << 7;
-  imm |= fieldFromInstruction32(Insn, 8, 4) << 8;
-  imm |= fieldFromInstruction32(Insn, 5, 1) << 12;
-  unsigned Q = fieldFromInstruction32(Insn, 6, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned imm = fieldFromInstruction(Insn, 0, 4);
+  imm |= fieldFromInstruction(Insn, 16, 3) << 4;
+  imm |= fieldFromInstruction(Insn, 24, 1) << 7;
+  imm |= fieldFromInstruction(Insn, 8, 4) << 8;
+  imm |= fieldFromInstruction(Insn, 5, 1) << 12;
+  unsigned Q = fieldFromInstruction(Insn, 6, 1);
 
   if (Q) {
     if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
@@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 18, 2);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 18, 2);
 
   if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  Rn |= fieldFromInstruction32(Insn, 7, 1) << 4;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
-  unsigned op = fieldFromInstruction32(Insn, 6, 1);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  Rn |= fieldFromInstruction(Insn, 7, 1) << 4;
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  Rm |= fieldFromInstruction(Insn, 5, 1) << 4;
+  unsigned op = fieldFromInstruction(Insn, 6, 1);
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
                                      uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned dst = fieldFromInstruction16(Insn, 8, 3);
-  unsigned imm = fieldFromInstruction16(Insn, 0, 8);
+  unsigned dst = fieldFromInstruction(Insn, 8, 3);
+  unsigned imm = fieldFromInstruction(Insn, 0, 8);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 0, 3);
-  unsigned Rm = fieldFromInstruction32(Val, 3, 3);
+  unsigned Rn = fieldFromInstruction(Val, 0, 3);
+  unsigned Rm = fieldFromInstruction(Val, 3, 3);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 0, 3);
-  unsigned imm = fieldFromInstruction32(Val, 3, 5);
+  unsigned Rn = fieldFromInstruction(Val, 0, 3);
+  unsigned imm = fieldFromInstruction(Val, 3, 5);
 
   if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 6, 4);
-  unsigned Rm = fieldFromInstruction32(Val, 2, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 2);
+  unsigned Rn = fieldFromInstruction(Val, 6, 4);
+  unsigned Rm = fieldFromInstruction(Val, 2, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 2);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
     case ARM::t2PLIs:
       break;
     default: {
-      unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
+      unsigned Rt = fieldFromInstruction(Insn, 12, 4);
       if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
     return MCDisassembler::Fail;
     }
   }
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
   if (Rn == 0xF) {
     switch (Inst.getOpcode()) {
       case ARM::t2LDRBs:
@@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
         return MCDisassembler::Fail;
     }
 
-    int imm = fieldFromInstruction32(Insn, 0, 12);
-    if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1;
+    int imm = fieldFromInstruction(Insn, 0, 12);
+    if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1;
     Inst.addOperand(MCOperand::CreateImm(imm));
 
     return S;
   }
 
-  unsigned addrmode = fieldFromInstruction32(Insn, 4, 2);
-  addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2;
-  addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6;
+  unsigned addrmode = fieldFromInstruction(Insn, 4, 2);
+  addrmode |= fieldFromInstruction(Insn, 0, 4) << 2;
+  addrmode |= fieldFromInstruction(Insn, 16, 4) << 6;
   if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder)))
     return MCDisassembler::Fail;
 
@@ -3151,9 +3164,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
-  int imm = Val & 0xFF;
-  if (!(Val & 0x100)) imm *= -1;
-  Inst.addOperand(MCOperand::CreateImm(imm << 2));
+  if (Val == 0)
+    Inst.addOperand(MCOperand::CreateImm(INT32_MIN));
+  else {
+    int imm = Val & 0xFF;
+
+    if (!(Val & 0x100)) imm *= -1;
+    Inst.addOperand(MCOperand::CreateImm(imm << 2));
+  }
 
   return MCDisassembler::Success;
 }
@@ -3162,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 9);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 9);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3177,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 8, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 8);
+  unsigned Rn = fieldFromInstruction(Val, 8, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 8);
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3205,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 9, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 9);
+  unsigned Rn = fieldFromInstruction(Val, 9, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 9);
 
   // Some instructions always use an additive offset.
   switch (Inst.getOpcode()) {
@@ -3236,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  addr |= fieldFromInstruction32(Insn, 9, 1) << 8;
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  addr |= fieldFromInstruction(Insn, 9, 1) << 8;
   addr |= Rn << 9;
-  unsigned load = fieldFromInstruction32(Insn, 20, 1);
+  unsigned load = fieldFromInstruction(Insn, 20, 1);
 
   if (!load) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3266,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 13, 4);
-  unsigned imm = fieldFromInstruction32(Val, 0, 12);
+  unsigned Rn = fieldFromInstruction(Val, 13, 4);
+  unsigned imm = fieldFromInstruction(Val, 0, 12);
 
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3279,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned imm = fieldFromInstruction16(Insn, 0, 7);
+  unsigned imm = fieldFromInstruction(Insn, 0, 7);
 
   Inst.addOperand(MCOperand::CreateReg(ARM::SP));
   Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3293,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
   DecodeStatus S = MCDisassembler::Success;
 
   if (Inst.getOpcode() == ARM::tADDrSP) {
-    unsigned Rdm = fieldFromInstruction16(Insn, 0, 3);
-    Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3;
+    unsigned Rdm = fieldFromInstruction(Insn, 0, 3);
+    Rdm |= fieldFromInstruction(Insn, 7, 1) << 3;
 
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3302,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder)))
     return MCDisassembler::Fail;
   } else if (Inst.getOpcode() == ARM::tADDspr) {
-    unsigned Rm = fieldFromInstruction16(Insn, 3, 4);
+    unsigned Rm = fieldFromInstruction(Insn, 3, 4);
 
     Inst.addOperand(MCOperand::CreateReg(ARM::SP));
     Inst.addOperand(MCOperand::CreateReg(ARM::SP));
@@ -3315,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
 
 static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
                            uint64_t Address, const void *Decoder) {
-  unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
-  unsigned flags = fieldFromInstruction16(Insn, 0, 3);
+  unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2;
+  unsigned flags = fieldFromInstruction(Insn, 0, 3);
 
   Inst.addOperand(MCOperand::CreateImm(imod));
   Inst.addOperand(MCOperand::CreateImm(flags));
@@ -3327,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
 static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned add = fieldFromInstruction32(Insn, 4, 1);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned add = fieldFromInstruction(Insn, 4, 1);
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3375,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
                        uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   if (Rn == ARM::SP) S = MCDisassembler::SoftFail;
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -3391,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned pred = fieldFromInstruction32(Insn, 22, 4);
+  unsigned pred = fieldFromInstruction(Insn, 22, 4);
   if (pred == 0xE || pred == 0xF) {
-    unsigned opc = fieldFromInstruction32(Insn, 4, 28);
+    unsigned opc = fieldFromInstruction(Insn, 4, 28);
     switch (opc) {
       default:
         return MCDisassembler::Fail;
@@ -3408,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
         break;
     }
 
-    unsigned imm = fieldFromInstruction32(Insn, 0, 4);
+    unsigned imm = fieldFromInstruction(Insn, 0, 4);
     return DecodeMemBarrierOption(Inst, imm, Address, Decoder);
   }
 
-  unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1;
-  brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19;
-  brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18;
-  brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12;
-  brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20;
+  unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1;
+  brtarget |= fieldFromInstruction(Insn, 11, 1) << 19;
+  brtarget |= fieldFromInstruction(Insn, 13, 1) << 18;
+  brtarget |= fieldFromInstruction(Insn, 16, 6) << 12;
+  brtarget |= fieldFromInstruction(Insn, 26, 1) << 20;
 
   if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3431,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
 // a splat operation or a rotation.
 static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
-  unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
+  unsigned ctrl = fieldFromInstruction(Val, 10, 2);
   if (ctrl == 0) {
-    unsigned byte = fieldFromInstruction32(Val, 8, 2);
-    unsigned imm = fieldFromInstruction32(Val, 0, 8);
+    unsigned byte = fieldFromInstruction(Val, 8, 2);
+    unsigned imm = fieldFromInstruction(Val, 0, 8);
     switch (byte) {
       case 0:
         Inst.addOperand(MCOperand::CreateImm(imm));
@@ -3451,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
         break;
     }
   } else {
-    unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80;
-    unsigned rot = fieldFromInstruction32(Val, 7, 5);
+    unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
+    unsigned rot = fieldFromInstruction(Val, 7, 5);
     unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
     Inst.addOperand(MCOperand::CreateImm(imm));
   }
@@ -3494,19 +3512,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
-  switch (Val) {
-  default:
+  if (Val & ~0xf)
     return MCDisassembler::Fail;
-  case 0xF: // SY
-  case 0xE: // ST
-  case 0xB: // ISH
-  case 0xA: // ISHST
-  case 0x7: // NSH
-  case 0x6: // NSHST
-  case 0x3: // OSH
-  case 0x2: // OSHST
-    break;
-  }
 
   Inst.addOperand(MCOperand::CreateImm(Val));
   return MCDisassembler::Success;
@@ -3523,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
 
@@ -3546,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
                                          uint64_t Address, const void *Decoder){
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -3573,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3598,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
   if (Rm == 0xF) S = MCDisassembler::SoftFail;
@@ -3626,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3651,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned imm = fieldFromInstruction32(Insn, 0, 12);
-  imm |= fieldFromInstruction32(Insn, 16, 4) << 13;
-  imm |= fieldFromInstruction32(Insn, 23, 1) << 12;
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned imm = fieldFromInstruction(Insn, 0, 12);
+  imm |= fieldFromInstruction(Insn, 16, 4) << 13;
+  imm |= fieldFromInstruction(Insn, 23, 1) << 12;
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail;
 
@@ -3676,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3688,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 2) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 2) != 0)
         align = 4;
   }
 
@@ -3735,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3747,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 2) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 2) != 0)
         align = 4;
   }
 
@@ -3793,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3806,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      index = fieldFromInstruction32(Insn, 5, 3);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 5, 3);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 1:
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 1) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 1) != 0)
         align = 8;
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3860,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3873,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      index = fieldFromInstruction32(Insn, 5, 3);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 5, 3);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 2;
       break;
     case 1:
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 4, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 5, 1))
+      if (fieldFromInstruction(Insn, 5, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 4, 1) != 0)
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 4, 1) != 0)
         align = 8;
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3924,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -3937,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
+      if (fieldFromInstruction(Insn, 4, 2))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -3994,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4007,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
+      if (fieldFromInstruction(Insn, 4, 2))
         return MCDisassembler::Fail; // UNDEFINED
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4058,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4071,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 8;
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
-        align = 4 << fieldFromInstruction32(Insn, 4, 2);
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 4, 2))
+        align = 4 << fieldFromInstruction(Insn, 4, 2);
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4132,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
-  unsigned Rd = fieldFromInstruction32(Insn, 12, 4);
-  Rd |= fieldFromInstruction32(Insn, 22, 1) << 4;
-  unsigned size = fieldFromInstruction32(Insn, 10, 2);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+  unsigned Rd = fieldFromInstruction(Insn, 12, 4);
+  Rd |= fieldFromInstruction(Insn, 22, 1) << 4;
+  unsigned size = fieldFromInstruction(Insn, 10, 2);
 
   unsigned align = 0;
   unsigned index = 0;
@@ -4145,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
     default:
       return MCDisassembler::Fail;
     case 0:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 4;
-      index = fieldFromInstruction32(Insn, 5, 3);
+      index = fieldFromInstruction(Insn, 5, 3);
       break;
     case 1:
-      if (fieldFromInstruction32(Insn, 4, 1))
+      if (fieldFromInstruction(Insn, 4, 1))
         align = 8;
-      index = fieldFromInstruction32(Insn, 6, 2);
-      if (fieldFromInstruction32(Insn, 5, 1))
+      index = fieldFromInstruction(Insn, 6, 2);
+      if (fieldFromInstruction(Insn, 5, 1))
         inc = 2;
       break;
     case 2:
-      if (fieldFromInstruction32(Insn, 4, 2))
-        align = 4 << fieldFromInstruction32(Insn, 4, 2);
-      index = fieldFromInstruction32(Insn, 7, 1);
-      if (fieldFromInstruction32(Insn, 6, 1))
+      if (fieldFromInstruction(Insn, 4, 2))
+        align = 4 << fieldFromInstruction(Insn, 4, 2);
+      index = fieldFromInstruction(Insn, 7, 1);
+      if (fieldFromInstruction(Insn, 6, 1))
         inc = 2;
       break;
   }
@@ -4196,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm  = fieldFromInstruction32(Insn,  5, 1);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+  unsigned Rt  = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm  = fieldFromInstruction(Insn,  5, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
 
   if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
     S = MCDisassembler::SoftFail;
@@ -4222,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4);
-  unsigned Rm  = fieldFromInstruction32(Insn,  5, 1);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
-  Rm |= fieldFromInstruction32(Insn, 0, 4) << 1;
+  unsigned Rt  = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 16, 4);
+  unsigned Rm  = fieldFromInstruction(Insn,  5, 1);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
+  Rm |= fieldFromInstruction(Insn, 0, 4) << 1;
 
   if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F)
     S = MCDisassembler::SoftFail;
@@ -4248,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
 static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  unsigned pred = fieldFromInstruction16(Insn, 4, 4);
-  unsigned mask = fieldFromInstruction16(Insn, 0, 4);
+  unsigned pred = fieldFromInstruction(Insn, 4, 4);
+  unsigned mask = fieldFromInstruction(Insn, 0, 4);
 
   if (pred == 0xF) {
     pred = 0xE;
@@ -4271,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   bool writeback = (W == 1) | (P == 0);
 
   addr |= (U << 8) | (Rn << 9);
@@ -4308,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rt = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4);
-  unsigned Rn = fieldFromInstruction32(Insn, 16, 4);
-  unsigned addr = fieldFromInstruction32(Insn, 0, 8);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-  unsigned U = fieldFromInstruction32(Insn, 23, 1);
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Insn, 8, 4);
+  unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+  unsigned addr = fieldFromInstruction(Insn, 0, 8);
+  unsigned W = fieldFromInstruction(Insn, 21, 1);
+  unsigned U = fieldFromInstruction(Insn, 23, 1);
+  unsigned P = fieldFromInstruction(Insn, 24, 1);
   bool writeback = (W == 1) | (P == 0);
 
   addr |= (U << 8) | (Rn << 9);
@@ -4340,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
-  unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
+  unsigned sign1 = fieldFromInstruction(Insn, 21, 1);
+  unsigned sign2 = fieldFromInstruction(Insn, 23, 1);
   if (sign1 != sign2) return MCDisassembler::Fail;
 
-  unsigned Val = fieldFromInstruction32(Insn, 0, 8);
-  Val |= fieldFromInstruction32(Insn, 12, 3) << 8;
-  Val |= fieldFromInstruction32(Insn, 26, 1) << 11;
+  unsigned Val = fieldFromInstruction(Insn, 0, 8);
+  Val |= fieldFromInstruction(Insn, 12, 3) << 8;
+  Val |= fieldFromInstruction(Insn, 26, 1) << 11;
   Val |= sign1 << 12;
   Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val)));
 
@@ -4366,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
 
 static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder) {
-  unsigned Rt   = fieldFromInstruction32(Insn, 12, 4);
-  unsigned Rt2  = fieldFromInstruction32(Insn, 0,  4);
-  unsigned Rn   = fieldFromInstruction32(Insn, 16, 4);
-  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+  unsigned Rt   = fieldFromInstruction(Insn, 12, 4);
+  unsigned Rt2  = fieldFromInstruction(Insn, 0,  4);
+  unsigned Rn   = fieldFromInstruction(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction(Insn, 28, 4);
 
   if (pred == 0xF)
     return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
@@ -4393,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
-  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
-  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
-  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
-  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
-  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction(Insn, 8, 4);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4421,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
-  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
-  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
-  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
-  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
-  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
-  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+  unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction(Insn, 8, 4);
 
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4451,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned Rn = fieldFromInstruction32(Val, 16, 4);
-  unsigned Rt = fieldFromInstruction32(Val, 12, 4);
-  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
-  Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
-  unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+  unsigned Rn = fieldFromInstruction(Val, 16, 4);
+  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  unsigned Rm = fieldFromInstruction(Val, 0, 4);
+  Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
+  unsigned Cond = fieldFromInstruction(Val, 28, 4);
  
-  if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+  if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
     S = MCDisassembler::SoftFail;
 
   if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
@@ -4479,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  unsigned CRm = fieldFromInstruction32(Val, 0, 4);
-  unsigned opc1 = fieldFromInstruction32(Val, 4, 4);
-  unsigned cop = fieldFromInstruction32(Val, 8, 4);
-  unsigned Rt = fieldFromInstruction32(Val, 12, 4);
-  unsigned Rt2 = fieldFromInstruction32(Val, 16, 4);
+  unsigned CRm = fieldFromInstruction(Val, 0, 4);
+  unsigned opc1 = fieldFromInstruction(Val, 4, 4);
+  unsigned cop = fieldFromInstruction(Val, 8, 4);
+  unsigned Rt = fieldFromInstruction(Val, 12, 4);
+  unsigned Rt2 = fieldFromInstruction(Val, 16, 4);
 
   if ((cop & ~0x1) == 0xa)
     return MCDisassembler::Fail;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2f6b1b0..8b9109e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -792,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
   llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
 }
 
+void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+
+  if (MO.isExpr()) {
+    O << *MO.getExpr();
+    return;
+  }
+
+  int32_t OffImm = (int32_t)MO.getImm();
+
+  if (OffImm == INT32_MIN)
+    O << "#-0";
+  else if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else
+    O << "#" << OffImm;
+}
+
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
                                             raw_ostream &O) {
   O << "#" << MI->getOperand(OpNum).getImm() * 4;
@@ -953,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
 
   O << "[" << getRegisterName(MO1.getReg());
 
-  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  int32_t OffImm = (int32_t)MO2.getImm();
+
+  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
   // Don't print +0.
-  if (OffImm < 0)
-    O << ", #-" << -OffImm * 4;
+  if (OffImm == INT32_MIN)
+    O << ", #-0";
+  else if (OffImm < 0)
+    O << ", #-" << -OffImm;
   else if (OffImm > 0)
-    O << ", #" << OffImm * 4;
+    O << ", #" << OffImm;
   O << "]";
 }
 
@@ -990,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
                                                         unsigned OpNum,
                                                         raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
-  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  int32_t OffImm = (int32_t)MO1.getImm();
+
+  assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
   // Don't print +0.
-  if (OffImm != 0) {
-    O << ", ";
-    if (OffImm < 0)
-      O << "#-" << -OffImm * 4;
-    else if (OffImm > 0)
-      O << "#" << OffImm * 4;
-  }
+  if (OffImm == INT32_MIN)
+    O << ", #-0";
+  else if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
 }
 
 void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 8acb7ee..73d7bfd 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -73,6 +73,7 @@ public:
   void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
+  void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ae11be8..de48a0e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -120,14 +120,22 @@ namespace ARM_MB {
   // The Memory Barrier Option constants map directly to the 4-bit encoding of
   // the option field for memory barrier operations.
   enum MemBOpt {
-    SY    = 15,
-    ST    = 14,
-    ISH   = 11,
-    ISHST = 10,
-    NSH   = 7,
-    NSHST = 6,
+    RESERVED_0 = 0,
+    RESERVED_1 = 1,
+    OSHST = 2,
     OSH   = 3,
-    OSHST = 2
+    RESERVED_4 = 4,
+    RESERVED_5 = 5,
+    NSHST = 6,
+    NSH   = 7,
+    RESERVED_8 = 8,
+    RESERVED_9 = 9,
+    ISHST = 10,
+    ISH   = 11,
+    RESERVED_12 = 12,
+    RESERVED_13 = 13,
+    ST    = 14,
+    SY    = 15
   };
 
   inline static const char *MemBOptToString(unsigned val) {
@@ -135,92 +143,24 @@ namespace ARM_MB {
     default: llvm_unreachable("Unknown memory operation");
     case SY:    return "sy";
     case ST:    return "st";
+    case RESERVED_13: return "#0xd";
+    case RESERVED_12: return "#0xc";
     case ISH:   return "ish";
     case ISHST: return "ishst";
+    case RESERVED_9: return "#0x9";
+    case RESERVED_8: return "#0x8";
     case NSH:   return "nsh";
     case NSHST: return "nshst";
+    case RESERVED_5: return "#0x5";
+    case RESERVED_4: return "#0x4";
     case OSH:   return "osh";
     case OSHST: return "oshst";
+    case RESERVED_1: return "#0x1";
+    case RESERVED_0: return "#0x0";
     }
   }
 } // namespace ARM_MB
 
-/// getARMRegisterNumbering - Given the enum value for some register, e.g.
-/// ARM::LR, return the number that it corresponds to (e.g. 14).
-inline static unsigned getARMRegisterNumbering(unsigned Reg) {
-  using namespace ARM;
-  switch (Reg) {
-  default:
-    llvm_unreachable("Unknown ARM register!");
-  case R0:  case S0:  case D0:  case Q0:  return 0;
-  case R1:  case S1:  case D1:  case Q1:  return 1;
-  case R2:  case S2:  case D2:  case Q2:  return 2;
-  case R3:  case S3:  case D3:  case Q3:  return 3;
-  case R4:  case S4:  case D4:  case Q4:  return 4;
-  case R5:  case S5:  case D5:  case Q5:  return 5;
-  case R6:  case S6:  case D6:  case Q6:  return 6;
-  case R7:  case S7:  case D7:  case Q7:  return 7;
-  case R8:  case S8:  case D8:  case Q8:  return 8;
-  case R9:  case S9:  case D9:  case Q9:  return 9;
-  case R10: case S10: case D10: case Q10: return 10;
-  case R11: case S11: case D11: case Q11: return 11;
-  case R12: case S12: case D12: case Q12: return 12;
-  case SP:  case S13: case D13: case Q13: return 13;
-  case LR:  case S14: case D14: case Q14: return 14;
-  case PC:  case S15: case D15: case Q15: return 15;
-
-  case S16: case D16: return 16;
-  case S17: case D17: return 17;
-  case S18: case D18: return 18;
-  case S19: case D19: return 19;
-  case S20: case D20: return 20;
-  case S21: case D21: return 21;
-  case S22: case D22: return 22;
-  case S23: case D23: return 23;
-  case S24: case D24: return 24;
-  case S25: case D25: return 25;
-  case S26: case D26: return 26;
-  case S27: case D27: return 27;
-  case S28: case D28: return 28;
-  case S29: case D29: return 29;
-  case S30: case D30: return 30;
-  case S31: case D31: return 31;
-
-  // Composite registers use the regnum of the first register in the list.
-  /* Q0  */     case D0_D2:   return 0;
-  case D1_D2:   case D1_D3:   return 1;
-  /* Q1  */     case D2_D4:   return 2;
-  case D3_D4:   case D3_D5:   return 3;
-  /* Q2  */     case D4_D6:   return 4;
-  case D5_D6:   case D5_D7:   return 5;
-  /* Q3  */     case D6_D8:   return 6;
-  case D7_D8:   case D7_D9:   return 7;
-  /* Q4  */     case D8_D10:  return 8;
-  case D9_D10:  case D9_D11:  return 9;
-  /* Q5  */     case D10_D12: return 10;
-  case D11_D12: case D11_D13: return 11;
-  /* Q6  */     case D12_D14: return 12;
-  case D13_D14: case D13_D15: return 13;
-  /* Q7  */     case D14_D16: return 14;
-  case D15_D16: case D15_D17: return 15;
-  /* Q8  */     case D16_D18: return 16;
-  case D17_D18: case D17_D19: return 17;
-  /* Q9  */     case D18_D20: return 18;
-  case D19_D20: case D19_D21: return 19;
-  /* Q10 */     case D20_D22: return 20;
-  case D21_D22: case D21_D23: return 21;
-  /* Q11 */     case D22_D24: return 22;
-  case D23_D24: case D23_D25: return 23;
-  /* Q12 */     case D24_D26: return 24;
-  case D25_D26: case D25_D27: return 25;
-  /* Q13 */     case D26_D28: return 26;
-  case D27_D28: case D27_D29: return 27;
-  /* Q14 */     case D28_D30: return 28;
-  case D29_D30: case D29_D31: return 29;
-  /* Q15 */
-  }
-}
-
 /// isARMLowRegister - Returns true if the register is a low register (r0-r7).
 ///
 static inline bool isARMLowRegister(unsigned Reg) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 1964bcd..94f1082 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter {
   void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
   const MCInstrInfo &MCII;
   const MCSubtargetInfo &STI;
+  const MCContext &CTX;
 
 public:
   ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
                    MCContext &ctx)
-    : MCII(mcii), STI(sti) {
+    : MCII(mcii), STI(sti), CTX(ctx) {
   }
 
   ~ARMMCCodeEmitter() {}
@@ -405,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   if (MO.isReg()) {
     unsigned Reg = MO.getReg();
-    unsigned RegNo = getARMRegisterNumbering(Reg);
+    unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
 
     // Q registers are encoded as 2x their register number.
     switch (Reg) {
@@ -434,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
   const MCOperand &MO  = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
 
-  Reg = getARMRegisterNumbering(MO.getReg());
+  Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 
   int32_t SImm = MO1.getImm();
   bool isAdd = true;
@@ -641,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
   return Val;
 }
 
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
-/// target.
+/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate
+/// ADR label target.
 uint32_t ARMMCCodeEmitter::
 getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
                    SmallVectorImpl<MCFixup> &Fixups) const {
@@ -652,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
                                     Fixups);
   int32_t offset = MO.getImm();
   uint32_t Val = 0x2000;
-  if (offset < 0) {
+
+  if (offset == INT32_MIN) {
+    Val = 0x1000;
+    offset = 0;
+  } else if (offset < 0) {
     Val = 0x1000;
     offset *= -1;
   }
-  Val |= offset;
+
+  int SoImmVal = ARM_AM::getSOImmVal(offset);
+  assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+  Val |= SoImmVal;
   return Val;
 }
 
-/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
 /// target.
 uint32_t ARMMCCodeEmitter::
 getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -670,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
     return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
                                     Fixups);
   int32_t Val = MO.getImm();
-  if (Val < 0) {
+  if (Val == INT32_MIN)
+    Val = 0x1000;
+  else if (Val < 0) {
     Val *= -1;
     Val |= 0x1000;
   }
   return Val;
 }
 
-/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
 /// target.
 uint32_t ARMMCCodeEmitter::
 getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
@@ -699,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
   //   {2-0} = Rn
   const MCOperand &MO1 = MI.getOperand(OpIdx);
   const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
-  unsigned Rn = getARMRegisterNumbering(MO1.getReg());
-  unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+  unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
+  unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
   return (Rm << 3) | Rn;
 }
 
@@ -716,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
   // If The first operand isn't a register, we have a label reference.
   const MCOperand &MO = MI.getOperand(OpIdx);
   if (!MO.isReg()) {
-    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC);   // Rn is PC.
     Imm12 = 0;
     isAdd = false ; // 'U' bit is set as part of the fixup.
 
@@ -796,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
   // If The first operand isn't a register, we have a label reference.
   const MCOperand &MO = MI.getOperand(OpIdx);
   if (!MO.isReg()) {
-    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC);   // Rn is PC.
     Imm8 = 0;
     isAdd = false ; // 'U' bit is set as part of the fixup.
 
@@ -832,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
   // {7-0}  = imm8
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
-  unsigned Reg = getARMRegisterNumbering(MO.getReg());
+  unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   unsigned Imm8 = MO1.getImm();
   return (Reg << 8) | Imm8;
 }
@@ -915,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx+1);
   const MCOperand &MO2 = MI.getOperand(OpIdx+2);
-  unsigned Rn = getARMRegisterNumbering(MO.getReg());
-  unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+  unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+  unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
   unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
   bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
   ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
@@ -946,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
   // {12}     isAdd
   // {11-0}   imm12/Rm
   const MCOperand &MO = MI.getOperand(OpIdx);
-  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
   Binary |= Rn << 14;
   return Binary;
@@ -969,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
     ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
     Binary <<= 7;                    // Shift amount is bits [11:7]
     Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
-    Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+    Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0]
   }
   return Binary | (isAdd << 12) | (isReg << 13);
 }
@@ -982,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx,
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx+1);
   bool isAdd = MO1.getImm() != 0;
-  return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4);
+  return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4);
 }
 
 uint32_t ARMMCCodeEmitter::
@@ -1000,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
   uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
   // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
   if (!isImm)
-    Imm8 = getARMRegisterNumbering(MO.getReg());
+    Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   return Imm8 | (isAdd << 8) | (isImm << 9);
 }
 
@@ -1018,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
 
   // If The first operand isn't a register, we have a label reference.
   if (!MO.isReg()) {
-    unsigned Rn = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC);   // Rn is PC.
 
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
@@ -1028,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
     ++MCNumCPRelocations;
     return (Rn << 9) | (1 << 13);
   }
-  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   unsigned Imm = MO2.getImm();
   bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
   bool isImm = MO1.getReg() == 0;
   uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
   // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
   if (!isImm)
-    Imm8 = getARMRegisterNumbering(MO1.getReg());
+    Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
   return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
 }
 
@@ -1063,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
   //   {2-0} = Rn
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
-  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
   unsigned Imm5 = MO1.getImm();
   return ((Imm5 & 0x1f) << 3) | Rn;
 }
@@ -1090,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
   // If The first operand isn't a register, we have a label reference.
   const MCOperand &MO = MI.getOperand(OpIdx);
   if (!MO.isReg()) {
-    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC);   // Rn is PC.
     Imm8 = 0;
     isAdd = false; // 'U' bit is handled as part of the fixup.
 
@@ -1136,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
   ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
 
   // Encode Rm.
-  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+  unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 
   // Encode the shift opcode.
   unsigned SBits = 0;
@@ -1161,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx,
   // Encode the shift operation Rs.
   // Encode Rs bit[11:8].
   assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
-  return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift);
 }
 
 unsigned ARMMCCodeEmitter::
@@ -1180,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx,
   ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
 
   // Encode Rm.
-  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+  unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 
   // Encode the shift opcode.
   unsigned SBits = 0;
@@ -1219,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
 
   // Encoded as [Rn, Rm, imm].
   // FIXME: Needs fixup support.
-  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+  unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
   Value <<= 4;
-  Value |= getARMRegisterNumbering(MO2.getReg());
+  Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg());
   Value <<= 2;
   Value |= MO3.getImm();
 
@@ -1235,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
   const MCOperand &MO2 = MI.getOperand(OpNum+1);
 
   // FIXME: Needs fixup support.
-  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+  unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg());
 
   // Even though the immediate is 8 bits long, we need 9 bits in order
   // to represent the (inverse of the) sign bit.
@@ -1297,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
   ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
 
   // Encode Rm.
-  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+  unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 
   // Encode the shift opcode.
   unsigned SBits = 0;
@@ -1353,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
 
   if (SPRRegs || DPRRegs) {
     // VLDM/VSTM
-    unsigned RegNo = getARMRegisterNumbering(Reg);
+    unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg);
     unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
     Binary |= (RegNo & 0x1f) << 8;
     if (SPRRegs)
@@ -1362,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
       Binary |= NumRegs * 2;
   } else {
     for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
-      unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+      unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg());
       Binary |= 1 << RegNo;
     }
   }
@@ -1378,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
   const MCOperand &Reg = MI.getOperand(Op);
   const MCOperand &Imm = MI.getOperand(Op + 1);
 
-  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
   unsigned Align = 0;
 
   switch (Imm.getImm()) {
@@ -1401,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
   const MCOperand &Reg = MI.getOperand(Op);
   const MCOperand &Imm = MI.getOperand(Op + 1);
 
-  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
   unsigned Align = 0;
 
   switch (Imm.getImm()) {
@@ -1427,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
   const MCOperand &Reg = MI.getOperand(Op);
   const MCOperand &Imm = MI.getOperand(Op + 1);
 
-  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg());
   unsigned Align = 0;
 
   switch (Imm.getImm()) {
@@ -1446,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
                           SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand &MO = MI.getOperand(Op);
   if (MO.getReg() == 0) return 0x0D;
-  return getARMRegisterNumbering(MO.getReg());
+  return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
 }
 
 unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 78faf59..a51e0fa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   // Even when it's not a scattered relocation, movw/movt always uses
   // a PAIR relocation.
   if (Type == macho::RIT_ARM_Half) {
-    // The other-half value only gets populated for the movt relocation.
+    // The other-half value only gets populated for the movt and movw
+    // relocation entries.
     uint32_t Value = 0;;
     switch ((unsigned)Fixup.getKind()) {
     default: break;
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      Value = (FixedValue >> 16) & 0xffff;
+      break;
     case ARM::fixup_arm_movt_hi16:
     case ARM::fixup_arm_movt_hi16_pcrel:
     case ARM::fixup_t2_movt_hi16:
     case ARM::fixup_t2_movt_hi16_pcrel:
-      Value = FixedValue;
+      Value = FixedValue & 0xffff;
       break;
     }
     macho::RelocationEntry MREPair;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2097bb9..e9e20dd 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   return Offset == 0;
 }
 
-/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-/// two-addrss instruction inserted by two-address pass.
-void
-Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
-                                       MachineInstr *UseMI,
-                                       const TargetRegisterInfo &TRI) const {
-  if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
-    return;
-
-  unsigned PredReg = 0;
-  ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
-  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
-    return;
-
-  // Schedule the copy so it doesn't come between previous instructions
-  // and UseMI which can form an IT block.
-  unsigned SrcReg = SrcMI->getOperand(1).getReg();
-  ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
-  MachineBasicBlock *MBB = UseMI->getParent();
-  MachineBasicBlock::iterator MBBI = SrcMI;
-  unsigned NumInsts = 0;
-  while (--MBBI != MBB->begin()) {
-    if (MBBI->isDebugValue())
-      continue;
-
-    MachineInstr *NMI = &*MBBI;
-    ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
-    if (!(NCC == CC || NCC == OCC) ||
-        NMI->modifiesRegister(SrcReg, &TRI) ||
-        NMI->modifiesRegister(ARM::CPSR, &TRI))
-      break;
-    if (++NumInsts == 4)
-      // Too many in a row!
-      return;
-  }
-
-  if (NumInsts) {
-    MBB->remove(SrcMI);
-    MBB->insert(++MBBI, SrcMI);
-  }
-}
-
 ARMCC::CondCodes
 llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
   unsigned Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 0911f8a..2cdcd06 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -57,11 +57,6 @@ public:
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const;
 
-  /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
-  /// two-addrss instruction inserted by two-address pass.
-  void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
-                             const TargetRegisterInfo &TRI) const;
-
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index c8e757b..4ddcd38 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
     Out << "GlobalValue::LinkerPrivateLinkage"; break;
   case GlobalValue::LinkerPrivateWeakLinkage:
     Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
-    Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
   case GlobalValue::AvailableExternallyLinkage:
     Out << "GlobalValue::AvailableExternallyLinkage "; break;
   case GlobalValue::LinkOnceAnyLinkage:
     Out << "GlobalValue::LinkOnceAnyLinkage "; break;
   case GlobalValue::LinkOnceODRLinkage:
     Out << "GlobalValue::LinkOnceODRLinkage "; break;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
+    Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break;
   case GlobalValue::WeakAnyLinkage:
     Out << "GlobalValue::WeakAnyLinkage"; break;
   case GlobalValue::WeakODRLinkage:
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 1357cc5..d756aec 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
   // can get a useful trip count.  The trip count can
   // be either a register or an immediate.  The location
   // of the value depends upon the type (reg or imm).
-  while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+  for (MachineRegisterInfo::reg_iterator
+       RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+       RI != RE; ++RI) {
+    IV_Opnd = &RI.getOperand();
     const MachineInstr *MI = IV_Opnd->getParent();
     if (L->contains(MI) && isCompareEqualsImm(MI)) {
       const MachineOperand &MO = MI->getOperand(2);
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index c7be5ce..c0c0df6 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -2580,22 +2580,16 @@ let isCall = 1, neverHasSideEffects = 1,
  }
 
 // Tail Calls.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
-  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
-          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
   def TCRETURNtg : JInst<(outs), (ins calltarget:$dst),
              "jump $dst // TAILCALL", []>;
 }
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
-  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
-          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
   def TCRETURNtext : JInst<(outs), (ins calltarget:$dst),
              "jump $dst // TAILCALL", []>;
 }
 
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
-  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
-          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in {
   def TCRETURNR : JInst<(outs), (ins IntRegs:$dst),
              "jumpr $dst // TAILCALL", []>;
 }
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 5d087db..4bacb8f 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -40,28 +40,27 @@ EnableIEEERndNear(
 
 HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
   HexagonGenSubtargetInfo(TT, CPU, FS),
-  HexagonArchVersion(V2),
   CPUString(CPU.str()) {
-  ParseSubtargetFeatures(CPU, FS);
 
-  switch(HexagonArchVersion) {
-  case HexagonSubtarget::V2:
-    break;
-  case HexagonSubtarget::V3:
-    EnableV3 = true;
-    break;
-  case HexagonSubtarget::V4:
-    break;
-  case HexagonSubtarget::V5:
-    break;
-  default:
-    // If the programmer has not specified a Hexagon version, default
-    // to -mv4.
+  // If the programmer has not specified a Hexagon version, default to -mv4.
+  if (CPUString.empty())
     CPUString = "hexagonv4";
-    HexagonArchVersion = HexagonSubtarget::V4;
-    break;
+
+  if (CPUString == "hexagonv2") {
+    HexagonArchVersion = V2;
+  } else if (CPUString == "hexagonv3") {
+    EnableV3 = true;
+    HexagonArchVersion = V3;
+  } else if (CPUString == "hexagonv4") {
+    HexagonArchVersion = V4;
+  } else if (CPUString == "hexagonv5") {
+    HexagonArchVersion = V5;
+  } else {
+    llvm_unreachable("Unrecognized Hexagon processor version");
   }
 
+  ParseSubtargetFeatures(CPUString, FS);
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 786a0c5..05f6fa6 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   ManglerPrefixTy PrefixTy = Mangler::Default;
   if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
     PrefixTy = Mangler::Private;
-  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
-           GV->hasLinkerPrivateWeakDefAutoLinkage())
+  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
     PrefixTy = Mangler::LinkerPrivate;
   
   // If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
index 6c7343b..28f5219 100644
--- a/lib/Target/Mips/AsmParser/CMakeLists.txt
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -1,3 +1,4 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 add_llvm_library(LLVMMipsAsmParser
   MipsAsmParser.cpp
   )
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 58b5590..43bd345 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,11 +11,20 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
 
 namespace {
 class MipsAsmParser : public MCTargetAsmParser {
+
+#define GET_ASSEMBLER_HEADER
+#include "MipsGenAsmMatcher.inc"
+
   bool MatchAndEmitInstruction(SMLoc IDLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                                MCStreamer &Out);
@@ -23,10 +32,11 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
   bool ParseInstruction(StringRef Name, SMLoc NameLoc,
-                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   bool ParseDirective(AsmToken DirectiveID);
 
+  OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&);
 public:
   MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
     : MCTargetAsmParser() {
@@ -35,6 +45,57 @@ public:
 };
 }
 
+namespace {
+
+/// MipsOperand - Instances of this class represent a parsed Mips machine
+/// instruction.
+class MipsOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    k_CondCode,
+    k_CoprocNum,
+    k_Immediate,
+    k_Memory,
+    k_PostIndexRegister,
+    k_Register,
+    k_Token
+  } Kind;
+
+  MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("unimplemented!");
+  }
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const{
+    llvm_unreachable("unimplemented!");
+  }
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("unimplemented!");
+  }
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    llvm_unreachable("unimplemented!");
+  }
+
+  bool isReg() const { return Kind == k_Register; }
+  bool isImm() const { return Kind == k_Immediate; }
+  bool isToken() const { return Kind == k_Token; }
+  bool isMem() const { return Kind == k_Memory; }
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return "";
+  }
+
+  unsigned getReg() const {
+    assert((Kind == k_Register) && "Invalid access!");
+    return 0;
+  }
+
+  virtual void print(raw_ostream &OS) const {
+    llvm_unreachable("unimplemented!");
+  }
+};
+}
+
 bool MipsAsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -58,6 +119,11 @@ ParseDirective(AsmToken DirectiveID) {
   return true;
 }
 
+MipsAsmParser::OperandMatchResultTy MipsAsmParser::
+  parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) {
+  return MatchOperand_ParseFail;
+}
+
 extern "C" void LLVMInitializeMipsAsmParser() {
   RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
   RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index e9a228c..f535c50 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -10,13 +10,18 @@ tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
 add_public_tablegen_target(MipsCommonTableGen)
 
 add_llvm_target(MipsCodeGen
+  Mips16FrameLowering.cpp
+  Mips16InstrInfo.cpp
+  Mips16RegisterInfo.cpp
   MipsAnalyzeImmediate.cpp
   MipsAsmPrinter.cpp
   MipsCodeEmitter.cpp
   MipsDelaySlotFiller.cpp
+  MipsELFWriterInfo.cpp
   MipsJITInfo.cpp
   MipsInstrInfo.cpp
   MipsISelDAGToDAG.cpp
@@ -26,6 +31,9 @@ add_llvm_target(MipsCodeGen
   MipsMCInstLower.cpp
   MipsMachineFunction.cpp
   MipsRegisterInfo.cpp
+  MipsSEFrameLowering.cpp
+  MipsSEInstrInfo.cpp
+  MipsSERegisterInfo.cpp
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 042b456..aa57472 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -16,6 +16,7 @@
 #include "MipsRegisterInfo.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr,
     return MCDisassembler::Fail;
 
   // Calling the auto-generated decoder function.
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
@@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr,
     return MCDisassembler::Fail;
 
   // Calling the auto-generated decoder function.
-  Result = decodeMips64Instruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
   }
   // If we fail to decode in Mips64 decoder space we can try in Mips32
-  Result = decodeMipsInstruction32(instr, Insn, Address, this, STI);
+  Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
+                             this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
@@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst,
                               uint64_t Address,
                               const void *Decoder) {
   int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
 
   Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg);
   Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
                                uint64_t Address,
                                const void *Decoder) {
   int Offset = SignExtend32<16>(Insn & 0xffff);
-  unsigned Reg = fieldFromInstruction32(Insn, 16, 5);
-  unsigned Base = fieldFromInstruction32(Insn, 21, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
 
   Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg);
   Base = getReg(Decoder, Mips::CPURegsRegClassID, Base);
@@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
                                      uint64_t Address,
                                      const void *Decoder) {
 
-  unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2;
+  unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
   Inst.addOperand(MCOperand::CreateImm(JumpOffset));
   return MCDisassembler::Success;
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 6fe0c11..18961fd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -35,6 +35,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     return 0;
   case FK_GPRel_4:
   case FK_Data_4:
+  case FK_Data_8:
   case Mips::fixup_Mips_LO16:
   case Mips::fixup_Mips_GPOFF_HI:
   case Mips::fixup_Mips_GPOFF_LO:
@@ -59,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     break;
   case Mips::fixup_Mips_HI16:
   case Mips::fixup_Mips_GOT_Local:
-    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    // Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
     Value = ((Value + 0x8000) >> 16) & 0xffff;
     break;
+  case Mips::fixup_Mips_HIGHER:
+    // Get the 3rd 16-bits.
+    Value = ((Value + 0x80008000LL) >> 32) & 0xffff;
+    break;
+  case Mips::fixup_Mips_HIGHEST:
+    // Get the 4th 16-bits.
+    Value = ((Value + 0x800080008000LL) >> 48) & 0xffff;
+    break;
   }
 
   return Value;
@@ -168,7 +177,9 @@ public:
       { "fixup_Mips_GPOFF_LO",     0,     16,   0 },
       { "fixup_Mips_GOT_PAGE",     0,     16,   0 },
       { "fixup_Mips_GOT_OFST",     0,     16,   0 },
-      { "fixup_Mips_GOT_DISP",     0,     16,   0 }
+      { "fixup_Mips_GOT_DISP",     0,     16,   0 },
+      { "fixup_Mips_HIGHER",       0,     16,   0 },
+      { "fixup_Mips_HIGHEST",      0,     16,   0 }
     };
 
     if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 77c1524..b8489ca 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -34,7 +34,8 @@ namespace {
 
   class MipsELFObjectWriter : public MCELFObjectTargetWriter {
   public:
-    MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64);
+    MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
+                        bool _isN64, bool IsLittleEndian);
 
     virtual ~MipsELFObjectWriter();
 
@@ -53,7 +54,7 @@ namespace {
 }
 
 MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
-                                         bool _isN64)
+                                         bool _isN64, bool IsLittleEndian)
   : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
                             /*HasRelocationAddend*/ false,
                             /*IsN64*/ _isN64) {}
@@ -103,6 +104,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   case FK_Data_4:
     Type = ELF::R_MIPS_32;
     break;
+  case FK_Data_8:
+    Type = ELF::R_MIPS_64;
+    break;
   case FK_GPRel_4:
     Type = ELF::R_MIPS_GPREL32;
     break;
@@ -169,6 +173,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
     Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type);
     Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type);
     break;
+  case Mips::fixup_Mips_HIGHER:
+    Type = ELF::R_MIPS_HIGHER;
+    break;
+  case Mips::fixup_Mips_HIGHEST:
+    Type = ELF::R_MIPS_HIGHEST;
+    break;
   }
   return Type;
 }
@@ -265,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
                                                 bool IsLittleEndian,
                                                 bool Is64Bit) {
   MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI,
-                                                (Is64Bit) ? true : false);
+                                                (Is64Bit) ? true : false,
+                                                IsLittleEndian);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index f5cbbd5..77faec5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -110,6 +110,12 @@ namespace Mips {
     // resulting in - R_MIPS_GOT_DISP
     fixup_Mips_GOT_DISP,
 
+    // resulting in - R_MIPS_GOT_HIGHER
+    fixup_Mips_HIGHER,
+
+    // resulting in - R_MIPS_HIGHEST
+    fixup_Mips_HIGHEST,
+
     // Marker
     LastTargetFixupKind,
     NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index ff3b3a7..8dab62d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -255,6 +255,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   case MCSymbolRefExpr::VK_Mips_TPREL_LO:
     FixupKind = Mips::fixup_Mips_TPREL_LO;
     break;
+  case MCSymbolRefExpr::VK_Mips_HIGHER:
+    FixupKind = Mips::fixup_Mips_HIGHER;
+    break;
+  case MCSymbolRefExpr::VK_Mips_HIGHEST:
+    FixupKind = Mips::fixup_Mips_HIGHEST;
+    break;
   } // switch
 
   Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index 596f071..93de517 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -16,7 +16,9 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
                 MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
                 MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
-                MipsGenEDInfo.inc MipsGenDisassemblerTables.inc
+                MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \
+                MipsGenAsmMatcher.inc
+
 DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 8548ae0..7cec531 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -44,6 +44,8 @@ def FeatureN64         : SubtargetFeature<"n64", "MipsABI", "N64",
                                 "Enable n64 ABI">;
 def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
                                 "Enable eabi ABI">;
+def FeatureAndroid     : SubtargetFeature<"android", "IsAndroid", "true",
+                                "Target is android">;
 def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
                                 "true", "Enable vector FPU instructions.">;
 def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
@@ -93,9 +95,20 @@ def MipsAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def MipsAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 0;
+}
+
+def MipsAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "$";
+}
+
 def Mips : Target {
   let InstructionSet = MipsInstrInfo;
-
+  let AssemblyParsers = [MipsAsmParser];
   let AssemblyWriters = [MipsAsmWriter];
+  let AssemblyParserVariants = [MipsAsmParserVariant];
 }
-
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
new file mode 100644
index 0000000..030042f
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -0,0 +1,87 @@
+//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16FrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  uint64_t StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+  // Adjust stack.
+  if (isInt<16>(-StackSize))
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize);
+}
+
+void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  uint64_t StackSize = MFI->getStackSize();
+
+  if (!StackSize)
+    return;
+
+  // Adjust stack.
+  if (isInt<16>(StackSize))
+    // assumes stacksize multiple of 8
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize);
+}
+
+bool Mips16FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  // FIXME: implement.
+  return true;
+}
+
+bool
+Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  // FIXME: implement.
+  return true;
+}
+
+void Mips16FrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+}
+
+const MipsFrameLowering *
+llvm::createMips16FrameLowering(const MipsSubtarget &ST) {
+  return new Mips16FrameLowering(ST);
+}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
new file mode 100644
index 0000000..25cc37b
--- /dev/null
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -0,0 +1,43 @@
+//===-- Mips16FrameLowering.h - Mips16 frame lowering  ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16_FRAMEINFO_H
+#define MIPS16_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+class Mips16FrameLowering : public MipsFrameLowering {
+public:
+  explicit Mips16FrameLowering(const MipsSubtarget &STI)
+    : MipsFrameLowering(STI) {}
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
new file mode 100644
index 0000000..2bc286b
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -0,0 +1,132 @@
+//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16InstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
+  : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+    RI(*tm.getSubtargetImpl(), *this) {}
+
+const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned Mips16InstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned Mips16InstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  return 0;
+}
+
+void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  unsigned Opc = 0, ZeroReg = 0;
+
+  if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+    if (Mips::CPURegsRegClass.contains(SrcReg))
+      Opc = Mips::Mov32R16;
+  }
+
+  assert(Opc && "Cannot copy registers");
+
+  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+  if (DestReg)
+    MIB.addReg(DestReg, RegState::Define);
+
+  if (ZeroReg)
+    MIB.addReg(ZeroReg);
+
+  if (SrcReg)
+    MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Mips16InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  assert(false && "Implement this function.");
+}
+
+void Mips16InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  assert(false && "Implement this function.");
+}
+
+bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock &MBB = *MI->getParent();
+
+  switch(MI->getDesc().getOpcode()) {
+  default:
+    return false;
+  case Mips::RetRA16:
+    ExpandRetRA16(MBB, MI, Mips::JrRa16);
+    break;
+  }
+
+  MBB.erase(MI);
+  return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+  assert(false && "Implement this function.");
+  return 0;
+}
+
+unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+  return 0;
+}
+
+void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned Opc) const {
+  BuildMI(MBB, I, I->getDebugLoc(), get(Opc));
+}
+
+const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) {
+  return new Mips16InstrInfo(TM);
+}
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
new file mode 100644
index 0000000..260c5b6
--- /dev/null
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -0,0 +1,76 @@
+//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16INSTRUCTIONINFO_H
+#define MIPS16INSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "Mips16RegisterInfo.h"
+
+namespace llvm {
+
+class Mips16InstrInfo : public MipsInstrInfo {
+  const Mips16RegisterInfo RI;
+
+public:
+  explicit Mips16InstrInfo(MipsTargetMachine &TM);
+
+  virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+private:
+  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+  void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                   unsigned Opc) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index c852042..94cf984 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -11,10 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-def uimm5      : Operand<i8> {
-  let DecoderMethod= "DecodeSimm16";
-}
-
 //
 // RRR-type instruction format
 //
@@ -46,9 +42,32 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
 class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
   FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
 
+
+class FEXT_2RI16_ins<bits<5> _op, string asmstr,
+                     InstrItinClass itin>:
+  FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
+            !strconcat(asmstr, "\t$rx, $imm"), [], itin> {
+  let Constraints = "$rx_ = $rx";
+}
+
+
 //
 // RR-type instruction format
 //
+
+class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+  FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
+        !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
+}
+
+class FRxRxRy16_ins<bits<5> f, string asmstr,
+                    InstrItinClass itin> :
+  FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+            !strconcat(asmstr, "\t$rz, $ry"),
+            [], itin> {
+  let Constraints = "$rx = $rz";
+}
+
 let rx=0 in
 class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_,
                               string asmstr, InstrItinClass itin>:
@@ -64,11 +83,16 @@ class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd,
   FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins  MemOpnd:$addr),
              !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
 
+class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd,
+                          InstrItinClass itin>:
+  FEXT_RRI16<op, (outs ), (ins  CPU16Regs:$ry, MemOpnd:$addr),
+             !strconcat(asmstr, "\t$ry, $addr"), [], itin>;
+
 //
 // EXT-SHIFT instruction format
 //
 class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
-  FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa),
+  FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
                !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
 
 //
@@ -80,20 +104,49 @@ def mem16 : Operand<i32> {
 }
 
 //
+// Some general instruction class info
+//
+//
+
+class ArithLogic16Defs<bit isCom=0> {
+  bits<5> shamt = 0;
+  bit isCommutable = isCom;
+  bit isReMaterializable = 1;
+  bit neverHasSideEffects = 1;
+}
+
+//
+
+// Format: ADDIU rx, immediate MIPS16e
+// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
+// To add a constant to a 32-bit integer.
+//
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+  ArithLogic16Defs<0>;
+
+//
+
 // Format: ADDIU rx, pc, immediate MIPS16e
 // Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
 // To add a constant to the program counter.
 //
-class AddiuRxPcImmX16_base : FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
-def AddiuRxPcImmX16   : AddiuRxPcImmX16_base;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
 //
 // Format: ADDU rz, rx, ry MIPS16e
 // Purpose: Add Unsigned Word (3-Operand)
 // To add 32-bit integers.
 //
 
-class AdduRxRyRz16_base: FRRR16_ins<01, "addu", IIAlu>;
-def AdduRxRyRz16: AdduRxRyRz16_base;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+
+//
+// Format: AND rx, ry MIPS16e
+// Purpose: AND
+// To do a bitwise logical AND.
+
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
 
 //
 // Format: JR ra MIPS16e
@@ -105,6 +158,34 @@ def AdduRxRyRz16: AdduRxRyRz16_base;
 def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>;
 
 //
+// Format: LB ry, offset(rx) MIPS16e
+// Purpose: Load Byte (Extended)
+// To load a byte from memory as a signed value.
+//
+def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>;
+
+//
+// Format: LBU ry, offset(rx) MIPS16e
+// Purpose: Load Byte Unsigned (Extended)
+// To load a byte from memory as a unsigned value.
+//
+def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>;
+
+//
+// Format: LH ry, offset(rx) MIPS16e
+// Purpose: Load Halfword signed (Extended)
+// To load a halfword from memory as a signed value.
+//
+def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>;
+
+//
+// Format: LHU ry, offset(rx) MIPS16e
+// Purpose: Load Halfword unsigned (Extended)
+// To load a halfword from memory as an unsigned value.
+//
+def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>;
+
+//
 // Format: LI rx, immediate MIPS16e
 // Purpose: Load Immediate (Extended)
 // To load a constant into a GPR.
@@ -116,8 +197,7 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
 // Purpose: Load Word (Extended)
 // To load a word from memory as a signed value.
 //
-class LwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
-def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
+def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>;
 
 //
 // Format: MOVE r32, rz MIPS16e
@@ -125,6 +205,28 @@ def LwRxRyOffMemX16: LwRxRyOffMemX16_base;
 // To move the contents of a GPR to a GPR.
 //
 def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+
+//
+// Format: NEG rx, ry MIPS16e
+// Purpose: Negate
+// To negate an integer value.
+//
+def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>;
+
+//
+// Format: NOT rx, ry MIPS16e
+// Purpose: Not
+// To complement an integer value
+//
+def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>;
+
+//
+// Format: OR rx, ry MIPS16e
+// Purpose: Or
+// To do a bitwise logical OR.
+//
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+
 //
 // Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
 // (All args are optional) MIPS16e
@@ -156,6 +258,20 @@ def SaveRaF16:
              "save \t$$ra, $frame_size", [], IILoad >;
 
 //
+// Format: SB ry, offset(rx) MIPS16e
+// Purpose: Store Byte (Extended)
+// To store a byte to memory.
+//
+def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>;
+
+//
+// Format: SH ry, offset(rx) MIPS16e
+// Purpose: Store Halfword (Extended)
+// To store a halfword to memory.
+//
+def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>;
+
+//
 // Format: SLL rx, ry, sa MIPS16e
 // Purpose: Shift Word Left Logical (Extended)
 // To execute a left-shift of a word by a fixed number of bits—0 to 31 bits.
@@ -163,57 +279,127 @@ def SaveRaF16:
 def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
 
 //
+// Format: SLLV ry, rx MIPS16e
+// Purpose: Shift Word Left Logical Variable
+// To execute a left-shift of a word by a variable number of bits.
+//
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+
+
+//
+// Format: SRAV ry, rx MIPS16e
+// Purpose: Shift Word Right Arithmetic Variable
+// To execute an arithmetic right-shift of a word by a variable
+// number of bits.
+//
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+
+
+//
+// Format: SRA rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Arithmetic (Extended)
+// To execute an arithmetic right-shift of a word by a fixed
+// number of bits—1 to 8 bits.
+//
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+
+
+//
+// Format: SRLV ry, rx MIPS16e
+// Purpose: Shift Word Right Logical Variable
+// To execute a logical right-shift of a word by a variable
+// number of bits.
+//
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+
+
+//
+// Format: SRL rx, ry, sa MIPS16e
+// Purpose: Shift Word Right Logical (Extended)
+// To execute a logical right-shift of a word by a fixed
+// number of bits—1 to 31 bits.
+//
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+
+//
+// Format: SUBU rz, rx, ry MIPS16e
+// Purpose: Subtract Unsigned Word
+// To subtract 32-bit integers
+//
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+
+//
 // Format: SW ry, offset(rx) MIPS16e
 // Purpose: Store Word (Extended)
 // To store a word to memory.
 //
-class SwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b11011, "sw", mem16, IIAlu>;
-def SwRxRyOffMemX16: SwRxRyOffMemX16_base;
+def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>;
+
+//
+// Format: XOR rx, ry MIPS16e
+// Purpose: Xor
+// To do a bitwise logical XOR.
+//
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
 
 class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [InMips16Mode];
 }
 
-class ArithLogicR16Defs<SDNode OpNode, bit isComm = 0> {
-  dag OutOperandList = (outs CPU16Regs:$rz);
-  dag InOperandList = (ins CPU16Regs:$rx, CPU16Regs:$ry);
-  list<dag> Pattern = [(set CPU16Regs:$rz,
-                       (OpNode CPU16Regs:$rx, CPU16Regs:$ry))];
-}
+// Unary Arith/Logic
+//
+class ArithLogicU_pat<PatFrag OpNode, Instruction I> :
+  Mips16Pat<(OpNode CPU16Regs:$r),
+            (I CPU16Regs:$r)>;
 
-multiclass ArithLogicR16_base {
-  def _add: AdduRxRyRz16_base, ArithLogicR16Defs<add, 1>;
-}
+def: ArithLogicU_pat<not, NotRxRy16>;
+def: ArithLogicU_pat<ineg, NegRxRy16>;
 
-defm ArithLogicR16_patt : ArithLogicR16_base;
+class ArithLogic16_pat<SDNode OpNode, Instruction I> :
+  Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r),
+            (I CPU16Regs:$l, CPU16Regs:$r)>;
 
-class LoadM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
-  bit isPseudo = Pseudo;
-  Operand MemOpnd = _MemOpnd;
-  dag OutOperandList = (outs CPU16Regs:$ry);
-  dag InOperandList = (ins MemOpnd:$addr);
-  list<dag> Pattern = [(set CPU16Regs:$ry, (OpNode addr:$addr))];
-}
+def: ArithLogic16_pat<add, AdduRxRyRz16>;
+def: ArithLogic16_pat<and, AndRxRxRy16>;
+def: ArithLogic16_pat<or, OrRxRxRy16>;
+def: ArithLogic16_pat<sub, SubuRxRyRz16>;
+def: ArithLogic16_pat<xor, XorRxRxRy16>;
 
-multiclass LoadM16_base {
-  def _LwRxRyOffMemX16: LwRxRyOffMemX16_base, LoadM16Defs<load_a, mem16>;
-}
+// Arithmetic and logical instructions with 2 register operands.
 
-defm LoadM16: LoadM16_base;
+class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> :
+  Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm),
+            (I CPU16Regs:$in, imm_type:$imm)>;
 
-class StoreM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> {
-  bit isPseudo = Pseudo;
-  Operand MemOpnd = _MemOpnd;
-  dag OutOperandList = (outs );
-  dag InOperandList = (ins CPU16Regs:$ry, MemOpnd:$addr);
-  list<dag> Pattern = [(OpNode CPU16Regs:$ry, addr:$addr)];
-}
+def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>;
+def: ArithLogicI16_pat<shl, immZExt5, SllX16>;
+def: ArithLogicI16_pat<srl, immZExt5, SrlX16>;
+def: ArithLogicI16_pat<sra, immZExt5, SraX16>;
 
-multiclass StoreM16_base {
-  def _SwRxRyOffMemX16: SwRxRyOffMemX16_base, StoreM16Defs<store_a, mem16>;
-}
+class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> :
+  Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra),
+            (I CPU16Regs:$r, CPU16Regs:$ra)>;
+
+def: shift_rotate_reg16_pat<shl, SllvRxRy16>;
+def: shift_rotate_reg16_pat<sra, SravRxRy16>;
+def: shift_rotate_reg16_pat<srl, SrlvRxRy16>;
+
+class LoadM16_pat<PatFrag OpNode, Instruction I> :
+  Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>;
+
+def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>;
+def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>;
+def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>;
+def: LoadM16_pat<load_a, LwRxRyOffMemX16>;
+
+class StoreM16_pat<PatFrag OpNode, Instruction I> :
+  Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>;
+
+def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
+def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>;
+def: StoreM16_pat<store_a, SwRxRyOffMemX16>;
 
-defm StoreM16: StoreM16_base;
 
 // Jump and Link (Call)
 let isCall=1, hasDelaySlot=1 in
@@ -226,18 +412,8 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
   hasExtraSrcRegAllocReq = 1 in
 def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
 
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-// This is basically deprecated code but needs to be there for things
-// to work.
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt),
-                                      ";",
-                                      [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP16   : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2),
-                                      ";",
-                                      [(callseq_end timm:$amt1, timm:$amt2)]>;
-}
-
 // Small immediates
-def : Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
-def : Mips16Pat<(MipsLo tglobaladdr:$in), (LiRxImmX16 tglobaladdr:$in)>;
+def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
+
+def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
+               (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
new file mode 100644
index 0000000..c15d1bf
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips16RegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST,
+                                       const TargetInstrInfo &TII)
+  : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void Mips16RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+                                     unsigned OpNo, int FrameIndex,
+                                     uint64_t StackSize,
+                                     int64_t SPOffset) const {
+      MachineInstr &MI = *II;
+      MachineFunction &MF = *MI.getParent()->getParent();
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+      int MinCSFI = 0;
+      int MaxCSFI = -1;
+
+      if (CSI.size()) {
+        MinCSFI = CSI[0].getFrameIdx();
+        MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+      }
+
+      // The following stack frame objects are always
+      // referenced relative to $sp:
+      //  1. Outgoing arguments.
+      //  2. Pointer to dynamically allocated stack space.
+      //  3. Locations for callee-saved registers.
+      // Everything else is referenced relative to whatever register
+      // getFrameRegister() returns.
+      unsigned FrameReg;
+
+      if (MipsFI->isOutArgFI(FrameIndex) ||
+         (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+        FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+      else
+        FrameReg = getFrameRegister(MF);
+
+      // Calculate final offset.
+      // - There is no need to change the offset if the frame object
+      //   is one of the
+      //   following: an outgoing argument, pointer to a dynamically allocated
+      //   stack space or a $gp restore location,
+      // - If the frame object is any of the following,
+      //   its offset must be adjusted
+      //   by adding the size of the stack:
+      //   incoming argument, callee-saved register location or local variable.
+      int64_t Offset;
+
+      if (MipsFI->isOutArgFI(FrameIndex))
+        Offset = SPOffset;
+      else
+        Offset = SPOffset + (int64_t)StackSize;
+
+      Offset    += MI.getOperand(OpNo + 1).getImm();
+
+      DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+      MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+      MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+
+
+}
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
new file mode 100644
index 0000000..3f4b3a7
--- /dev/null
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -0,0 +1,37 @@
+//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips16 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS16REGISTERINFO_H
+#define MIPS16REGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class Mips16RegisterInfo : public MipsRegisterInfo {
+public:
+  Mips16RegisterInfo(const MipsSubtarget &Subtarget,
+                     const TargetInstrInfo &TII);
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+private:
+  virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+                           int FrameIndex, uint64_t StackSize,
+                           int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index cceee24..20fc178 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -208,26 +208,25 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
 def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
 def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
 
-def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
 }
 let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
-                 Requires<[IsN64, HasStandardEncoding]> {
-  let isCodeGenOnly = 1;
-}
+def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+                 Requires<[IsN64, HasStandardEncoding]>;
 let DecoderNamespace = "Mips64" in {
 def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
 
 def DEXT : ExtBase<3, "dext", CPU64Regs>;
 def DINS : InsBase<7, "dins", CPU64Regs>;
 
-def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
-                   "dsll\t$rd, $rt, 32", [], IIAlu>;
-def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
-                  "sll\t$rd, $rt, 0", [], IIAlu>;
-let isCodeGenOnly = 1 in
-def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
-                  "sll\t$rd, $rt, 0", [], IIAlu>;
+let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
+  def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                     "dsll\t$rd, $rt, 32", [], IIAlu>;
+  def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                    "sll\t$rd, $rt, 0", [], IIAlu>;
+  def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+                    "sll\t$rd, $rt, 0", [], IIAlu>;
+}
 }
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 8aadefd..19213fa 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[
 ]>;
 
 //===----------------------------------------------------------------------===//
+// Mips Android Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MipsAndroid : CallingConv<[
+  // f32 are returned in registers F0, F2, F1, F3
+  CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>,
+
+  CCDelegateTo<RetCC_MipsO32>
+]>;
+
+//===----------------------------------------------------------------------===//
 // Mips FastCC Calling Convention
 //===----------------------------------------------------------------------===//
 def CC_MipsO32_FastCC : CallingConv<[
@@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[
   CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
   CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>,
   CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
+  CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>,
   CCDelegateTo<RetCC_MipsO32>
 ]>;
 
diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp
new file mode 100644
index 0000000..ac3a547
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.cpp
@@ -0,0 +1,92 @@
+//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsELFWriterInfo.h"
+#include "MipsRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the MipsELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+  : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+  EMachine = EM_MIPS;
+}
+
+MipsELFWriterInfo::~MipsELFWriterInfo() {}
+
+unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch(MachineRelTy) {
+  case Mips::reloc_mips_pc16:
+    return ELF::R_MIPS_GOT16;
+  case Mips::reloc_mips_hi:
+    return ELF::R_MIPS_HI16;
+  case Mips::reloc_mips_lo:
+    return ELF::R_MIPS_LO16;
+  case Mips::reloc_mips_26:
+    return ELF::R_MIPS_26;
+  default:
+    llvm_unreachable("unknown Mips machine relocation type");
+  }
+}
+
+long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                     long int Modifier) const {
+  switch(RelTy) {
+  case ELF::R_MIPS_26: return Modifier;
+  default:
+    llvm_unreachable("unknown Mips relocation type");
+  }
+}
+
+unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  switch(RelTy) {
+  case ELF::R_MIPS_GOT16:
+  case ELF::R_MIPS_26:
+      return 32;
+  default:
+    llvm_unreachable("unknown Mips relocation type");
+  }
+}
+
+bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  switch(RelTy) {
+  case ELF::R_MIPS_GOT16:
+      return true;
+  case ELF::R_MIPS_26:
+      return false;
+  default:
+    llvm_unreachable("unknown Mips relocation type");
+  }
+}
+
+unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return Mips::reloc_mips_26;
+}
+
+long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                              unsigned RelOffset,
+                                              unsigned RelTy) const {
+
+  if (RelTy == ELF::R_MIPS_GOT16)
+    return SymOffset - (RelOffset + 4);
+
+  llvm_unreachable("computeRelocation unknown for this relocation type");
+}
diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h
new file mode 100644
index 0000000..23f3f03
--- /dev/null
+++ b/lib/Target/Mips/MipsELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the Mips backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_ELF_WRITER_INFO_H
+#define MIPS_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class MipsELFWriterInfo : public TargetELFWriterInfo {
+
+  public:
+    MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+    virtual ~MipsELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // MIPS_ELF_WRITER_INFO_H
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 6338f3c..8c0474b 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -15,6 +15,7 @@
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,6 +82,14 @@ using namespace llvm;
 //
 //===----------------------------------------------------------------------===//
 
+const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM,
+                                                   const MipsSubtarget &ST) {
+  if (TM.getSubtargetImpl()->inMips16Mode())
+    return llvm::createMips16FrameLowering(ST);
+
+  return llvm::createMipsSEFrameLowering(ST);
+}
+
 // hasFP - Return true if the specified function should have a dedicated frame
 // pointer register.  This is true if the function has variable sized allocas or
 // if frame pointer elimination is disabled.
@@ -89,218 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
       MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
 }
-
-bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
-  return true;
-}
-
-void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB   = MF.front();
-  MachineFrameInfo *MFI    = MF.getFrameInfo();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  const MipsRegisterInfo *RegInfo =
-    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
-  const MipsInstrInfo &TII =
-    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
-  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
-  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
-  // First, compute final stack size.
-  unsigned StackAlign = getStackAlignment();
-  uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign);
-
-  if (MipsFI->globalBaseRegSet())
-    StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign;
-  else
-    StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign);
-
-   // Update stack size
-  MFI->setStackSize(StackSize);
-
-  // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->adjustsStack()) return;
-
-  MachineModuleInfo &MMI = MF.getMMI();
-  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
-  MachineLocation DstML, SrcML;
-
-  // Adjust stack.
-  if (isInt<16>(-StackSize)) {// addi sp, sp, (-stacksize)
-    if (STI.inMips16Mode())
-      BuildMI(MBB, MBBI, dl,
-              TII.get(Mips::SaveRaF16)).addImm(StackSize); // cleanup
-    else
-      BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
-  }
-  else { // Expand immediate that doesn't fit in 16-bit.
-    unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
-    MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
-    Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
-                        0);
-    BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
-  }
-
-  // emit ".cfi_def_cfa_offset StackSize"
-  MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
-  BuildMI(MBB, MBBI, dl,
-          TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
-  DstML = MachineLocation(MachineLocation::VirtualFP);
-  SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
-  Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
-
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
-  if (CSI.size()) {
-    // Find the instruction past the last instruction that saves a callee-saved
-    // register to the stack.
-    for (unsigned i = 0; i < CSI.size(); ++i)
-      ++MBBI;
-
-    // Iterate over list of callee-saved registers and emit .cfi_offset
-    // directives.
-    MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
-    BuildMI(MBB, MBBI, dl,
-            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
-
-    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
-           E = CSI.end(); I != E; ++I) {
-      int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
-      unsigned Reg = I->getReg();
-
-      // If Reg is a double precision register, emit two cfa_offsets,
-      // one for each of the paired single precision registers.
-      if (Mips::AFGR64RegClass.contains(Reg)) {
-        MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
-        MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
-        MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
-        MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
-
-        if (!STI.isLittle())
-          std::swap(SrcML0, SrcML1);
-
-        Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
-        Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
-      } else {
-        // Reg is either in CPURegs or FGR32.
-        DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
-        SrcML = MachineLocation(Reg);
-        Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
-      }
-    }
-  }
-
-  // if framepointer enabled, set it to point to the stack pointer.
-  if (hasFP(MF)) {
-    // Insert instruction "move $fp, $sp" at this location.
-    BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
-
-    // emit ".cfi_def_cfa_register $fp"
-    MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
-    BuildMI(MBB, MBBI, dl,
-            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
-    DstML = MachineLocation(FP);
-    SrcML = MachineLocation(MachineLocation::VirtualFP);
-    Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
-  }
-}
-
-void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
-                                 MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  MachineFrameInfo *MFI            = MF.getFrameInfo();
-  const MipsInstrInfo &TII =
-    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
-  DebugLoc dl = MBBI->getDebugLoc();
-  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
-  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
-  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
-
-  // if framepointer enabled, restore the stack pointer.
-  if (hasFP(MF)) {
-    // Find the first instruction that restores a callee-saved register.
-    MachineBasicBlock::iterator I = MBBI;
-
-    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
-      --I;
-
-    // Insert instruction "move $sp, $fp" at this location.
-    BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
-  }
-
-  // Get the number of bytes from FrameInfo
-  uint64_t StackSize = MFI->getStackSize();
-
-  if (!StackSize)
-    return;
-
-  // Adjust stack.
-  if (isInt<16>(StackSize)) { // addi sp, sp, (-stacksize)
-    if (STI.inMips16Mode())
-      // assumes stacksize multiple of 8
-      BuildMI(MBB, MBBI, dl,
-              TII.get(Mips::RestoreRaF16)).addImm(StackSize);
-    else
-      BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
-  }
-  else { // Expand immediate that doesn't fit in 16-bit.
-    unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
-
-    MF.getInfo<MipsFunctionInfo>()->setEmitNOAT();
-    Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false,
-                        0);
-    BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg);
-  }
-}
-
-void MipsFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
-
-  // FIXME: remove this code if register allocator can correctly mark
-  //        $fp and $ra used or unused.
-
-  // Mark $fp and $ra as used or unused.
-  if (hasFP(MF))
-    MRI.setPhysRegUsed(FP);
-}
-
-bool MipsFrameLowering::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI,
-                          const TargetRegisterInfo *TRI) const {
-  MachineFunction *MF = MBB.getParent();
-  MachineBasicBlock *EntryBlock = MF->begin();
-  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
-
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    // Add the callee-saved register as live-in. Do not add if the register is
-    // RA and return address is taken, because it has already been added in
-    // method MipsTargetLowering::LowerRETURNADDR.
-    // It's killed at the spill, unless the register is RA and return address
-    // is taken.
-    unsigned Reg = CSI[i].getReg();
-    bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
-        && MF->getFrameInfo()->isReturnAddressTaken();
-    if (!IsRAAndRetAddrIsTaken)
-      EntryBlock->addLiveIn(Reg);
-
-    // Insert the spill to the stack frame.
-    bool IsKill = !IsRAAndRetAddrIsTaken;
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
-                            CSI[i].getFrameIdx(), RC, TRI);
-  }
-
-  return true;
-}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index e364ded..ed7b7fe 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -27,28 +27,19 @@ protected:
 
 public:
   explicit MipsFrameLowering(const MipsSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0),
-      STI(sti) {
-  }
+    : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0,
+                          sti.hasMips64() ? 16 : 8), STI(sti) {}
 
-  bool targetHandlesStackFrameRounding() const;
-
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const;
+  static const MipsFrameLowering *create(MipsTargetMachine &TM,
+                                         const MipsSubtarget &ST);
 
   bool hasFP(const MachineFunction &MF) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const;
 };
 
+/// Create MipsInstrInfo objects.
+const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST);
+const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST);
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ea33b74..5a97c17 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -117,28 +117,23 @@ private:
 void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
-  if (((MF.getTarget().getRelocationModel() == Reloc::Static) ||
-       Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet())
+  if (!MipsFI->globalBaseRegSet())
     return;
 
   MachineBasicBlock &MBB = MF.front();
   MachineBasicBlock::iterator I = MBB.begin();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo();
-  const MipsInstrInfo *MII = TM.getInstrInfo();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
-  int FI = 0;
+  const TargetRegisterClass *RC;
 
-  FI= MipsFI->initGlobalRegFI();
-
-  const TargetRegisterClass *RC = Subtarget.isABI_N64() ?
-    (const TargetRegisterClass*)&Mips::CPU64RegsRegClass :
-    (const TargetRegisterClass*)&Mips::CPURegsRegClass;
-
-  if (Subtarget.inMips16Mode())
-    RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+  if (Subtarget.isABI_N64())
+    RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass;
+  else if (Subtarget.inMips16Mode())
+    RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass;
+  else
+    RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass;
 
   V0 = RegInfo.createVirtualRegister(RC);
   V1 = RegInfo.createVirtualRegister(RC);
@@ -158,23 +153,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
       .addReg(Mips::T9_64);
     BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
       .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
-                             TargetRegInfo);
     return;
   }
 
   if (Subtarget.inMips16Mode()) {
     BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
-        .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
-    BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16),
-            V1)
-        .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
-    BuildMI(MBB, I, DL, TII.get(Mips::SllX16),
-            V2 ).addReg(V0).addImm(16);
+      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
+      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
+    BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
     BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg)
       .addReg(V1).addReg(V2);
-
-
     return;
   }
 
@@ -203,19 +192,11 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
     BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
     BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
       .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
-    MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC,
-                             TargetRegInfo);
     return;
   }
 
   assert(Subtarget.isABI_O32());
 
-
-  //if (Subtarget.inMips16Mode())
-  //  return; // no need to load GP. It can be calculated anywhere
-
-
-
   // For O32 ABI, the following instruction sequence is emitted to initialize
   // the global base register:
   //
@@ -237,7 +218,6 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
   MBB.addLiveIn(Mips::V0);
   BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg)
     .addReg(Mips::V0).addReg(Mips::T9);
-  MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo);
 }
 
 bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
@@ -262,13 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
 
   // Replace uses with ZeroReg.
   for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
-       E = MRI->use_end(); U != E; ++U) {
+       E = MRI->use_end(); U != E;) {
     MachineOperand &MO = U.getOperand();
+    unsigned OpNo = U.getOperandNo();
     MachineInstr *MI = MO.getParent();
+    ++U;
 
     // Do not replace if it is a phi's operand or is tied to def operand.
-    if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()) ||
-        MI->isPseudo())
+    if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo())
       continue;
 
     MO.setReg(ZeroReg);
@@ -309,21 +290,6 @@ bool MipsDAGToDAGISel::
 SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
   EVT ValTy = Addr.getValueType();
 
-  // If Parent is an unaligned f32 load or store, select a (base + index)
-  // floating point load/store instruction (luxc1 or suxc1).
-  const LSBaseSDNode *LS = 0;
-
-  if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
-    EVT VT = LS->getMemoryVT();
-
-    if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
-      assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
-             "Unaligned loads/stores not supported for this type.");
-      if (VT == MVT::f32)
-        return false;
-    }
-  }
-
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
@@ -382,6 +348,8 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
     }
 
     // If an indexed floating point load/store can be emitted, return false.
+    const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
+
     if (LS &&
         (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
         Subtarget.hasMips32r2Or64())
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 7741f9f..c5207c6 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -157,7 +157,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
   setOperationAction(ISD::FCOPYSIGN,          MVT::f32,   Custom);
   setOperationAction(ISD::FCOPYSIGN,          MVT::f64,   Custom);
@@ -178,7 +177,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
     setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
     setOperationAction(ISD::SELECT,             MVT::i64,   Custom);
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,   Custom);
     setOperationAction(ISD::LOAD,               MVT::i64,   Custom);
     setOperationAction(ISD::STORE,              MVT::i64,   Custom);
   }
@@ -217,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::CTLZ_ZERO_UNDEF,   MVT::i64,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i64,   Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,  Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,  Expand);
 
   if (!Subtarget->hasMips32r2())
     setOperationAction(ISD::ROTR, MVT::i32,   Expand);
@@ -314,8 +314,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   case MVT::i64:
   case MVT::i32:
     return true;
-  case MVT::f32:
-    return Subtarget->hasMips32r2Or64();
   default:
     return false;
   }
@@ -794,7 +792,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
   {
     case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
     case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
-    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
     case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
     case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
     case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
@@ -1504,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 //  Misc Lower Operation implementation
 //===----------------------------------------------------------------------===//
 SDValue MipsTargetLowering::
-LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
-{
-  MachineFunction &MF = DAG.getMachineFunction();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
-
-  assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
-         cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
-         "Cannot lower if the alignment of the allocated space is larger than \
-          that of the stack.");
-
-  SDValue Chain = Op.getOperand(0);
-  SDValue Size = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
-
-  // Get a reference from Mips stack pointer
-  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
-
-  // Subtract the dynamic size from the actual stack size to
-  // obtain the new stack size.
-  SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
-
-  // The Sub result contains the new stack start address, so it
-  // must be placed in the stack pointer register.
-  Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
-
-  // This node always has two return values: a new stack pointer
-  // value and a chain
-  SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
-  SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
-  SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
-
-  return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
-}
-
-SDValue MipsTargetLowering::
 LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
 {
   // The first operand is the chain, the second is the condition, the third is
@@ -2455,9 +2416,9 @@ static unsigned getNextIntArgReg(unsigned Reg) {
 
 // Write ByVal Arg to arg registers and stack.
 static void
-WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+WriteByValArg(SDValue Chain, DebugLoc dl,
               SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
-              SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+              SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
               MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
               const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
               MVT PtrType, bool isLittle) {
@@ -2531,24 +2492,24 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
     return;
   }
 
-  // Create a fixed object on stack at offset LocMemOffset and copy
-  // remaining part of byval arg to it using memcpy.
+  // Copy remaining part of byval arg using memcpy.
   SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
                             DAG.getConstant(Offset, MVT::i32));
-  LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true);
-  SDValue Dst = DAG.getFrameIndex(LastFI, PtrType);
-  ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
-                             DAG.getConstant(RemainingSize, MVT::i32),
-                             std::min(ByValAlign, (unsigned)4),
-                             /*isVolatile=*/false, /*AlwaysInline=*/false,
-                             MachinePointerInfo(0), MachinePointerInfo(0));
+  SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr,
+                            DAG.getIntPtrConstant(LocMemOffset));
+  Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+                        DAG.getConstant(RemainingSize, MVT::i32),
+                        std::min(ByValAlign, (unsigned)4),
+                        /*isVolatile=*/false, /*AlwaysInline=*/false,
+                        MachinePointerInfo(0), MachinePointerInfo(0));
+  MemOpChains.push_back(Chain);
 }
 
 // Copy Mips64 byVal arg to registers and stack.
 void static
-PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+PassByValArg64(SDValue Chain, DebugLoc dl,
                SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass,
-               SmallVector<SDValue, 8> &MemOpChains, int &LastFI,
+               SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr,
                MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
                const CCValAssign &VA, const ISD::ArgFlagsTy &Flags,
                EVT PtrTy, bool isLittle) {
@@ -2620,16 +2581,16 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
 
   assert(MemCpySize && "MemCpySize must not be zero.");
 
-  // Create a fixed object on stack at offset LocMemOffset and copy
-  // remainder of byval arg to it with memcpy.
+  // Copy remainder of byval arg to it with memcpy.
   SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
                             DAG.getConstant(Offset, PtrTy));
-  LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
-  SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
-  ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
-                             DAG.getConstant(MemCpySize, PtrTy), Alignment,
-                             /*isVolatile=*/false, /*AlwaysInline=*/false,
-                             MachinePointerInfo(0), MachinePointerInfo(0));
+  SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr,
+                            DAG.getIntPtrConstant(LocMemOffset));
+  Chain = DAG.getMemcpy(Chain, dl, Dst, Src,
+                        DAG.getConstant(MemCpySize, PtrTy), Alignment,
+                        /*isVolatile=*/false, /*AlwaysInline=*/false,
+                        MachinePointerInfo(0), MachinePointerInfo(0));
+  MemOpChains.push_back(Chain);
 }
 
 /// LowerCall - functions arguments are copied from virtual regs to
@@ -2643,9 +2604,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue InChain                       = CLI.Chain;
+  SDValue Chain                         = CLI.Chain;
   SDValue Callee                        = CLI.Callee;
-  SDValue CalleeSave                    = CLI.Callee;
   bool &isTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
   bool isVarArg                         = CLI.IsVarArg;
@@ -2675,18 +2635,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NextStackOffset = CCInfo.getNextStackOffset();
-
-  // Chain is the output chain of the last Load/Store or CopyToReg node.
-  // ByValChain is the output chain of the last Memcpy node created for copying
-  // byval arguments to the stack.
-  SDValue Chain, CallSeqStart, ByValChain;
-  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
-  Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal);
-  ByValChain = InChain;
-
-  // Get the frame index of the stack frame object that points to the location
-  // of dynamically allocated area on the stack.
-  int DynAllocFI = MipsFI->getDynAllocFI();
+  unsigned StackAlignment = TFL->getStackAlignment();
+  NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment);
 
   // Update size of the maximum argument space.
   // For O32, a minimum of four words (16 bytes) of argument space is
@@ -2694,27 +2644,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (IsO32 && (CallConv != CallingConv::Fast))
     NextStackOffset = std::max(NextStackOffset, (unsigned)16);
 
-  unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
-
-  if (MaxCallFrameSize < NextStackOffset) {
-    MipsFI->setMaxCallFrameSize(NextStackOffset);
+  // Chain is the output chain of the last Load/Store or CopyToReg node.
+  // ByValChain is the output chain of the last Memcpy node created for copying
+  // byval arguments to the stack.
+  SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
+  Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
 
-    // Set the offsets relative to $sp of the $gp restore slot and dynamically
-    // allocated stack space. These offsets must be aligned to a boundary
-    // determined by the stack alignment of the ABI.
-    unsigned StackAlignment = TFL->getStackAlignment();
-    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
-                      StackAlignment * StackAlignment;
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
+                                        IsN64 ? Mips::SP_64 : Mips::SP,
+                                        getPointerTy());
 
-    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
-  }
+  if (MipsFI->getMaxCallFrameSize() < NextStackOffset)
+    MipsFI->setMaxCallFrameSize(NextStackOffset);
 
   // With EABI is it possible to have 16 args on registers.
   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-  int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0;
-
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     SDValue Arg = OutVals[i];
@@ -2727,11 +2673,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       assert(Flags.getByValSize() &&
              "ByVal args of size 0 should have been ignored by front-end.");
       if (IsO32)
-        WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+        WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr,
                       MFI, DAG, Arg, VA, Flags, getPointerTy(),
                       Subtarget->isLittle());
       else
-        PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+        PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr,
                        MFI, DAG, Arg, VA, Flags, getPointerTy(),
                        Subtarget->isLittle());
       continue;
@@ -2781,29 +2727,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Register can't get to this point...
     assert(VA.isMemLoc());
 
-    // Create the frame index object for this incoming parameter
-    LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
-                                    VA.getLocMemOffset(), true);
-    SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
-
     // emit ISD::STORE whichs stores the
     // parameter value to a stack Location
+    SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+                                 DAG.getIntPtrConstant(VA.getLocMemOffset()));
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                        MachinePointerInfo(), false, false, 0));
   }
 
-  // Extend range of indices of frame objects for outgoing arguments that were
-  // created during this function call. Skip this step if no such objects were
-  // created.
-  if (LastFI)
-    MipsFI->extendOutArgFIRange(FirstFI, LastFI);
-
-  // If a memcpy has been created to copy a byval arg to a stack, replace the
-  // chain input of CallSeqStart with ByValChain.
-  if (InChain != ByValChain)
-    DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain,
-                           NextStackOffsetVal);
-
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
   if (!MemOpChains.empty())
@@ -2867,6 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
   }
 
+  // T9 register operand.
+  SDValue T9;
+
   // T9 should contain the address of the callee function if
   // -reloction-model=pic or it is an indirect call.
   if (IsPICCall || !GlobalOrExternal) {
@@ -2874,7 +2808,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
     Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
     InFlag = Chain.getValue(1);
-    Callee = DAG.getRegister(T9Reg, getPointerTy());
+
+    if (Subtarget->inMips16Mode())
+      T9 = DAG.getRegister(T9Reg, getPointerTy());
+    else
+      Callee = DAG.getRegister(T9Reg, getPointerTy());
   }
 
   // Insert node "GP copy globalreg" before call to function.
@@ -2902,7 +2840,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(Chain);
-  Ops.push_back(Subtarget->inMips16Mode()? CalleeSave: Callee);
+  Ops.push_back(Callee);
 
   // Add argument registers to the end of the list so that they are
   // known live into the call.
@@ -2910,8 +2848,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
-  if (Subtarget->inMips16Mode())
-    Ops.push_back(Callee);
+  // Add T9 register operand.
+  if (T9.getNode())
+    Ops.push_back(T9);
+
   // Add a register mask operand representing the call-preserved registers.
   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
@@ -2925,8 +2865,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(NextStackOffset, true),
+  Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal,
                              DAG.getIntPtrConstant(0, true), InFlag);
   InFlag = Chain.getValue(1);
 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index edab03c..95ea8fa 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -132,7 +132,6 @@ namespace llvm {
     // Lower Operand specifics
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 9654b86..df45df4 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -101,18 +101,18 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
 }
 // FP indexed load.
 class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
-                RegisterClass PRC, PatFrag FOp>:
+                RegisterClass PRC, SDPatternOperator FOp = null_frag>:
   FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
-           !strconcat(opstr, "\t$fd, $index($base)"),
+           !strconcat(opstr, "\t$fd, ${index}(${base})"),
            [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
   let fs = 0;
 }
 
 // FP indexed store.
 class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
-                 RegisterClass PRC, PatFrag FOp>:
+                 RegisterClass PRC, SDPatternOperator FOp= null_frag>:
   FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
-           !strconcat(opstr, "\t$fs, $index($base)"),
+           !strconcat(opstr, "\t$fs, ${index}(${base})"),
            [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
   let fd = 0;
 }
@@ -270,7 +270,7 @@ let Predicates = [NotN64, HasStandardEncoding] in {
 }
 
 let Predicates = [NotN64, HasMips64, HasStandardEncoding],
-    DecoderNamespace = "Mips64" in {
+  DecoderNamespace = "Mips64" in {
   def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
   def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
 }
@@ -283,9 +283,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
 // Indexed loads and stores.
 let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
   def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
-  def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
   def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
-  def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
 }
 
 let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
@@ -301,13 +299,23 @@ let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mip
 // n64
 let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
   def LWXC1_P8   : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
-  def LUXC1_P8   : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
   def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
   def SWXC1_P8   : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
-  def SUXC1_P8   : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
   def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
 }
 
+// Load/store doubleword indexed unaligned.
+let Predicates = [NotMips64, HasStandardEncoding] in {
+  def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
+  def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
+}
+
+let Predicates = [HasMips64, HasStandardEncoding],
+  DecoderNamespace="Mips64" in {
+  def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
+  def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
+}
+
 /// Floating-point Aritmetic
 defm FADD : FFR2P_M<0x00, "add", fadd, 1>;
 defm FDIV : FFR2P_M<0x03, "div", fdiv>;
@@ -408,25 +416,23 @@ let Defs=[FCR31] in {
 //===----------------------------------------------------------------------===//
 // Floating Point Pseudo-Instructions
 //===----------------------------------------------------------------------===//
-def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
-                             "# MOVCCRToCCR", []>;
+def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src),
+                           "# MOVCCRToCCR", []>;
 
 // This pseudo instr gets expanded into 2 mtc1 instrs after register
 // allocation.
 def BuildPairF64 :
-  MipsPseudo<(outs AFGR64:$dst),
-             (ins CPURegs:$lo, CPURegs:$hi), "",
-             [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
+  PseudoSE<(outs AFGR64:$dst),
+           (ins CPURegs:$lo, CPURegs:$hi), "",
+           [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>;
 
 // This pseudo instr gets expanded into 2 mfc1 instrs after register
 // allocation.
 // if n is 0, lower part of src is extracted.
 // if n is 1, higher part of src is extracted.
 def ExtractElementF64 :
-  MipsPseudo<(outs CPURegs:$dst),
-             (ins AFGR64:$src, i32imm:$n), "",
-             [(set CPURegs:$dst,
-               (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
+  PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "",
+           [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>;
 
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
@@ -466,17 +472,3 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
   def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
   def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
 }
-
-// Patterns for unaligned floating point loads and stores.
-let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in {
-  def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
-  def : MipsPat<(store_u FGR32:$src, CPURegs:$addr),
-                (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
-}
-
-let Predicates = [IsN64, HasStandardEncoding] in {
-  def : MipsPat<(f32 (load_u CPU64Regs:$addr)),
-                (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
-  def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr),
-                (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
-}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 15a77fb..8feb853 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -70,25 +70,35 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
   let DecoderNamespace = "Mips";
 
   field bits<32> SoftFail = 0;
+}
 
+// Mips32/64 Instruction Format
+class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
+             InstrItinClass itin, Format f>:
+  MipsInst<outs, ins, asmstr, pattern, itin, f> {
   let Predicates = [HasStandardEncoding];
-
 }
 
 // Mips Pseudo Instructions Format
 class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
-      MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
+  MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> {
   let isCodeGenOnly = 1;
   let isPseudo = 1;
 }
 
+// Mips32/64 Pseudo Instruction Format
+class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
+  MipsPseudo<outs, ins, asmstr, pattern> {
+  let Predicates = [HasStandardEncoding];
+}
+
 //===----------------------------------------------------------------------===//
 // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
 //===----------------------------------------------------------------------===//
 
 class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
          list<dag> pattern, InstrItinClass itin>:
-      MipsInst<outs, ins, asmstr, pattern, itin, FrmR>
+  InstSE<outs, ins, asmstr, pattern, itin, FrmR>
 {
   bits<5>  rd;
   bits<5>  rs;
@@ -111,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
 //===----------------------------------------------------------------------===//
 
 class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+         InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI>
 {
   bits<5>  rt;
   bits<5>  rs;
@@ -126,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
                   list<dag> pattern, InstrItinClass itin>:
-  MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
+  InstSE<outs, ins, asmstr, pattern, itin, FrmI>
 {
   bits<5>  rs;
   bits<5>  rt;
@@ -144,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
 //===----------------------------------------------------------------------===//
 
 class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ>
+         InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ>
 {
   bits<26> addr;
 
@@ -172,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
 
 class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
           string asmstr, list<dag> pattern> :
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR>
 {
   bits<5>  fd;
   bits<5>  fs;
@@ -196,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
 //===----------------------------------------------------------------------===//
 
 class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI>
 {
   bits<5>  ft;
   bits<5>  base;
@@ -214,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
 //===----------------------------------------------------------------------===//
 
 class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
-          MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
 {
   bits<5>  fs;
   bits<5>  ft;
@@ -235,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
 
 class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
             list<dag> pattern> :
-  MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
 {
   bits<5>  rd;
   bits<5>  rs;
@@ -256,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr,
 
 class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr,
              list<dag> pattern> :
-  MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
 {
   bits<5>  fd;
   bits<5>  fs;
@@ -303,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
 // Floating point madd/msub/nmadd/nmsub.
 class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
                 list<dag> pattern>
-  : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+  : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
   bits<5> fd;
   bits<5> fr;
   bits<5> fs;
@@ -321,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
 // FP indexed load/store instructions.
 class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
                list<dag> pattern> :
-  MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+  InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
 {
   bits<5>  base;
   bits<5>  index;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 458e4f7..50e3eb5 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -27,68 +27,19 @@
 
 using namespace llvm;
 
-MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr)
   : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
-    TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
-    InMips16Mode(TM.getSubtarget<MipsSubtarget>().inMips16Mode()),
-    RI(*TM.getSubtargetImpl(), *this),
-    UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
+    TM(tm), UncondBrOpc(UncondBr) {}
 
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
-  return RI;
-}
+const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) {
+  if (TM.getSubtargetImpl()->inMips16Mode())
+    return llvm::createMips16InstrInfo(TM);
 
-static bool isZeroImm(const MachineOperand &op) {
-  return op.isImm() && op.getImm() == 0;
+  return llvm::createMipsSEInstrInfo(TM);
 }
 
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned MipsInstrInfo::
-isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
-  unsigned Opc = MI->getOpcode();
-
-  if ((Opc == Mips::LW)    || (Opc == Mips::LW_P8)  || (Opc == Mips::LD) ||
-      (Opc == Mips::LD_P8) || (Opc == Mips::LWC1)   || (Opc == Mips::LWC1_P8) ||
-      (Opc == Mips::LDC1)  || (Opc == Mips::LDC164) ||
-      (Opc == Mips::LDC164_P8)) {
-    if ((MI->getOperand(1).isFI()) && // is a stack slot
-        (MI->getOperand(2).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(2)))) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-  }
-
-  return 0;
-}
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned MipsInstrInfo::
-isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
-{
-  unsigned Opc = MI->getOpcode();
-
-  if ((Opc == Mips::SW)    || (Opc == Mips::SW_P8)  || (Opc == Mips::SD) ||
-      (Opc == Mips::SD_P8) || (Opc == Mips::SWC1)   || (Opc == Mips::SWC1_P8) ||
-      (Opc == Mips::SDC1)  || (Opc == Mips::SDC164) ||
-      (Opc == Mips::SDC164_P8)) {
-    if ((MI->getOperand(1).isFI()) && // is a stack slot
-        (MI->getOperand(2).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(2)))) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-  }
-  return 0;
+bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const {
+  return op.isImm() && op.getImm() == 0;
 }
 
 /// insertNoop - If data hazard condition is found insert the target nop
@@ -100,83 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
   BuildMI(MBB, MI, DL, get(Mips::NOP));
 }
 
-void MipsInstrInfo::
-copyPhysReg(MachineBasicBlock &MBB,
-            MachineBasicBlock::iterator I, DebugLoc DL,
-            unsigned DestReg, unsigned SrcReg,
-            bool KillSrc) const {
-  unsigned Opc = 0, ZeroReg = 0;
-
-  if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
-    if (Mips::CPURegsRegClass.contains(SrcReg)) {
-      if (InMips16Mode)
-        Opc=Mips::Mov32R16;
-      else {
-        Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
-      }
-    }
-    else if (Mips::CCRRegClass.contains(SrcReg))
-      Opc = Mips::CFC1;
-    else if (Mips::FGR32RegClass.contains(SrcReg))
-      Opc = Mips::MFC1;
-    else if (SrcReg == Mips::HI)
-      Opc = Mips::MFHI, SrcReg = 0;
-    else if (SrcReg == Mips::LO)
-      Opc = Mips::MFLO, SrcReg = 0;
-  }
-  else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
-    if (Mips::CCRRegClass.contains(DestReg))
-      Opc = Mips::CTC1;
-    else if (Mips::FGR32RegClass.contains(DestReg))
-      Opc = Mips::MTC1;
-    else if (DestReg == Mips::HI)
-      Opc = Mips::MTHI, DestReg = 0;
-    else if (DestReg == Mips::LO)
-      Opc = Mips::MTLO, DestReg = 0;
-  }
-  else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
-    Opc = Mips::FMOV_S;
-  else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
-    Opc = Mips::FMOV_D32;
-  else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
-    Opc = Mips::FMOV_D64;
-  else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
-    Opc = Mips::MOVCCRToCCR;
-  else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
-    if (Mips::CPU64RegsRegClass.contains(SrcReg))
-      Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
-    else if (SrcReg == Mips::HI64)
-      Opc = Mips::MFHI64, SrcReg = 0;
-    else if (SrcReg == Mips::LO64)
-      Opc = Mips::MFLO64, SrcReg = 0;
-    else if (Mips::FGR64RegClass.contains(SrcReg))
-      Opc = Mips::DMFC1;
-  }
-  else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
-    if (DestReg == Mips::HI64)
-      Opc = Mips::MTHI64, DestReg = 0;
-    else if (DestReg == Mips::LO64)
-      Opc = Mips::MTLO64, DestReg = 0;
-    else if (Mips::FGR64RegClass.contains(DestReg))
-      Opc = Mips::DMTC1;
-  }
-
-  assert(Opc && "Cannot copy registers");
-
-  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-
-  if (DestReg)
-    MIB.addReg(DestReg, RegState::Define);
-
-  if (ZeroReg)
-    MIB.addReg(ZeroReg);
-
-  if (SrcReg)
-    MIB.addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
-                                        unsigned Flag) {
+MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
+                                                unsigned Flag) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
@@ -185,130 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
                                  MFI.getObjectSize(FI), Align);
 }
 
-void MipsInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
-
-  unsigned Opc = 0;
-
-  if (Mips::CPURegsRegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
-  else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
-  else if (Mips::FGR32RegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
-  else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
-    Opc = Mips::SDC1;
-  else if (Mips::FGR64RegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
-
-  assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-}
-
-void MipsInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const
-{
-  DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
-  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
-  unsigned Opc = 0;
-
-  if (Mips::CPURegsRegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
-  else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
-  else if (Mips::FGR32RegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
-  else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
-    Opc = Mips::LDC1;
-  else if (Mips::FGR64RegClass.hasSubClassEq(RC))
-    Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
-
-  assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
-    .addMemOperand(MMO);
-}
-
-void MipsInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I,
-                                unsigned Opc) const {
-  BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc))
-    .addReg(Mips::RA);
-}
-
-void MipsInstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I,
-                                unsigned Opc) const {
-  BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc));
-}
-
-void MipsInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I) const {
-  const TargetInstrInfo *TII = TM.getInstrInfo();
-  unsigned DstReg = I->getOperand(0).getReg();
-  unsigned SrcReg = I->getOperand(1).getReg();
-  unsigned N = I->getOperand(2).getImm();
-  const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
-  DebugLoc dl = I->getDebugLoc();
-
-  assert(N < 2 && "Invalid immediate");
-  unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
-  unsigned SubReg = TM.getRegisterInfo()->getSubReg(SrcReg, SubIdx);
-
-  BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
-}
-
-void MipsInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I) const {
-  const TargetInstrInfo *TII = TM.getInstrInfo();
-  unsigned DstReg = I->getOperand(0).getReg();
-  unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
-  const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
-  DebugLoc dl = I->getDebugLoc();
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
-  // mtc1 Lo, $fp
-  // mtc1 Hi, $fp + 1
-  BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpeven))
-    .addReg(LoReg);
-  BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpodd))
-    .addReg(HiReg);
-}
-
-bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
-  MachineBasicBlock &MBB = *MI->getParent();
-
-  switch(MI->getDesc().getOpcode()) {
-  default:
-    return false;
-  case Mips::RetRA:
-    ExpandRetRA(MBB, MI, Mips::RET);
-    break;
-  case Mips::RetRA16:
-    ExpandRetRA16(MBB, MI, Mips::JrRa16);
-    break;
-  case Mips::BuildPairF64:
-    ExpandBuildPairF64(MBB, MI);
-    break;
-  case Mips::ExtractElementF64:
-    ExpandExtractElementF64(MBB, MI);
-    break;
-  }
-
-  MBB.erase(MI);
-  return true;
-}
-
 MachineInstr*
 MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
                                         uint64_t Offset, const MDNode *MDPtr,
@@ -322,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
 // Branch Analysis
 //===----------------------------------------------------------------------===//
 
-static unsigned GetAnalyzableBrOpc(unsigned Opc) {
-  return (Opc == Mips::BEQ    || Opc == Mips::BNE    || Opc == Mips::BGTZ   ||
-          Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
-          Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
-          Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
-          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::B      ||
-          Opc == Mips::J) ?
-         Opc : 0;
-}
-
-/// GetOppositeBranchOpc - Return the inverse of the specified
-/// opcode, e.g. turning BEQ to BNE.
-unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
-{
-  switch (Opc) {
-  default:           llvm_unreachable("Illegal opcode!");
-  case Mips::BEQ:    return Mips::BNE;
-  case Mips::BNE:    return Mips::BEQ;
-  case Mips::BGTZ:   return Mips::BLEZ;
-  case Mips::BGEZ:   return Mips::BLTZ;
-  case Mips::BLTZ:   return Mips::BGEZ;
-  case Mips::BLEZ:   return Mips::BGTZ;
-  case Mips::BEQ64:  return Mips::BNE64;
-  case Mips::BNE64:  return Mips::BEQ64;
-  case Mips::BGTZ64: return Mips::BLEZ64;
-  case Mips::BGEZ64: return Mips::BLTZ64;
-  case Mips::BLTZ64: return Mips::BGEZ64;
-  case Mips::BLEZ64: return Mips::BGTZ64;
-  case Mips::BC1T:   return Mips::BC1F;
-  case Mips::BC1F:   return Mips::BC1T;
-  }
-}
-
-static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
-                          MachineBasicBlock *&BB,
-                          SmallVectorImpl<MachineOperand> &Cond) {
+void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+                                  MachineBasicBlock *&BB,
+                                  SmallVectorImpl<MachineOperand> &Cond) const {
   assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
   int NumOp = Inst->getNumExplicitOperands();
 
@@ -527,7 +246,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
 {
   assert( (Cond.size() && Cond.size() <= 3) &&
           "Invalid Mips branch condition!");
-  Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm()));
+  Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm()));
   return false;
 }
 
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 358f817..7d56259 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -26,99 +26,69 @@
 namespace llvm {
 
 class MipsInstrInfo : public MipsGenInstrInfo {
+protected:
   MipsTargetMachine &TM;
-  bool IsN64; bool InMips16Mode;
-  const MipsRegisterInfo RI;
   unsigned UncondBrOpc;
+
 public:
-  explicit MipsInstrInfo(MipsTargetMachine &TM);
+  explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc);
 
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  virtual const MipsRegisterInfo &getRegisterInfo() const;
-
-  /// isLoadFromStackSlot - If the specified machine instruction is a direct
-  /// load from a stack slot, return the virtual or physical register number of
-  /// the destination along with the FrameIndex of the loaded stack slot.  If
-  /// not, return 0.  This predicate must return 0 if the instruction has
-  /// any side effects other than loading from the stack slot.
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-
-  /// isStoreToStackSlot - If the specified machine instruction is a direct
-  /// store to a stack slot, return the virtual or physical register number of
-  /// the source reg along with the FrameIndex of the loaded stack slot.  If
-  /// not, return 0.  This predicate must return 0 if the instruction has
-  /// any side effects other than storing to the stack slot.
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
+  static const MipsInstrInfo *create(MipsTargetMachine &TM);
 
   /// Branch Analysis
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
                              SmallVectorImpl<MachineOperand> &Cond,
                              bool AllowModify) const;
-  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-private:
-  void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                   unsigned Opc) const;
-  void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                   unsigned Opc) const;
 
-  void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
-                   const SmallVectorImpl<MachineOperand>& Cond) const;
-  void ExpandExtractElementF64(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator I) const;
-  void ExpandBuildPairF64(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator I) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
 
-public:
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const;
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
-
-  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
   virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
                                                  int FrameIx, uint64_t Offset,
                                                  const MDNode *MDPtr,
                                                  DebugLoc DL) const;
 
-  virtual
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
   /// Insert nop instruction when hazard condition is found
   virtual void insertNoop(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI) const;
 
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MipsRegisterInfo &getRegisterInfo() const = 0;
+
+  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0;
+
   /// Return the number of bytes of code the specified instruction may be.
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+protected:
+  bool isZeroImm(const MachineOperand &op) const;
+
+  MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI,
+                                   unsigned Flag) const;
+
+private:
+  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0;
+
+  void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc,
+                     MachineBasicBlock *&BB,
+                     SmallVectorImpl<MachineOperand> &Cond) const;
+
+  void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL,
+                   const SmallVectorImpl<MachineOperand>& Cond) const;
 };
 
 namespace Mips {
-  /// GetOppositeBranchOpc - Return the inverse of the specified
-  /// opcode, e.g. turning BEQ to BNE.
-  unsigned GetOppositeBranchOpc(unsigned Opc);
-
   /// Emit a series of instructions to load an immediate. All instructions
   /// except for the last one are emitted. The function returns the number of
   /// MachineInstrs generated. The opcode-immediate pair of the last
@@ -130,6 +100,10 @@ namespace Mips {
                 MipsAnalyzeImmediate::Inst *LastInst);
 }
 
+/// Create MipsInstrInfo objects.
+const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM);
+const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM);
+
 }
 
 #endif
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index f1aada4..fd952ef 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -208,17 +208,24 @@ def uimm16      : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
 }
 
+def MipsMemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let ParserMethod = "parseMemOperand";
+}
+
 // Address operand
 def mem : Operand<i32> {
   let PrintMethod = "printMemOperand";
   let MIOperandInfo = (ops CPURegs, simm16);
   let EncoderMethod = "getMemEncoding";
+  let ParserMatchClass = MipsMemAsmOperand;
 }
 
 def mem64 : Operand<i64> {
   let PrintMethod = "printMemOperand";
   let MIOperandInfo = (ops CPU64Regs, simm16_64);
   let EncoderMethod = "getMemEncoding";
+  let ParserMatchClass = MipsMemAsmOperand;
 }
 
 def mem_ea : Operand<i32> {
@@ -722,9 +729,11 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
   let neverHasSideEffects = 1;
 }
 
-class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
-  FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
-     instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> :
+  FMem<opc, (outs RC:$rt), (ins Mem:$addr),
+     instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> {
+ let isCodeGenOnly = 1;
+}
 
 // Count Leading Ones/Zeros in Word
 class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
@@ -803,9 +812,9 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
 // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
 class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
                  RegisterClass PRC> :
-  MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
-             !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
-             [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+  PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
+           !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
+           [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
 
 multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
   def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
@@ -819,9 +828,9 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
 // Atomic Compare & Swap.
 class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
                     RegisterClass PRC> :
-  MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
-             !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
-             [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+  PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+           !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+           [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
 
 multiclass AtomicCmpSwap32<PatFrag Op, string Width>  {
   def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
@@ -851,14 +860,13 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
 
 // Return RA.
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
-def RetRA : MipsPseudo<(outs), (ins), "", [(MipsRet)]>;
+def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>;
 
-// As stack alignment is always done with addiu, we need a 16-bit immediate
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
                                   "!ADJCALLSTACKDOWN $amt",
                                   [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
                                   "!ADJCALLSTACKUP $amt1",
                                   [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
@@ -868,8 +876,8 @@ def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
 // are used, we have the same behavior, but get also a bunch of warnings
 // from the assembler.
 let neverHasSideEffects = 1 in
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
-                           ".cprestore\t$loc", []>;
+def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp),
+                         ".cprestore\t$loc", []>;
 
 let usesCustomInserter = 1 in {
   defm ATOMIC_LOAD_ADD_I8   : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
@@ -969,8 +977,8 @@ defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>;
 defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>;
 
 let hasSideEffects = 1 in
-def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
-                    [(MipsSync imm:$stype)], NoItinerary, FrmOther>
+def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
+                  [(MipsSync imm:$stype)], NoItinerary, FrmOther>
 {
   bits<5> stype;
   let Opcode = 0;
@@ -1046,17 +1054,13 @@ let addr=0 in
 // instructions. The same not happens for stack address copies, so an
 // add op with mem ComplexPattern is used and the stack address copy
 // can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
-  let isCodeGenOnly = 1;
-}
+def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
 
 // DynAlloc node points to dynamically allocated stack space.
 // $sp is added to the list of implicitly used registers to prevent dead code
 // elimination from removing instructions that modify $sp.
 let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> {
-  let isCodeGenOnly = 1;
-}
+def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
 
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 150bdbb..052046a 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -27,7 +27,52 @@ using namespace llvm;
 
 
 void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
-  report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+  unsigned NewAddr = (intptr_t)New;
+  unsigned OldAddr = (intptr_t)Old;
+  const unsigned NopInstr = 0x0;
+
+  // If the functions are in the same memory segment, insert PC-region branch.
+  if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) {
+    unsigned *OldInstruction = (unsigned *)Old;
+    *OldInstruction = 0x08000000;
+    unsigned JTargetAddr = NewAddr & 0x0FFFFFFC;
+
+    JTargetAddr >>= 2;
+    *OldInstruction |= JTargetAddr;
+
+    // Insert a NOP.
+    OldInstruction++;
+    *OldInstruction = NopInstr;
+
+    sys::Memory::InvalidateInstructionCache(Old, 2 * 4);
+  } else {
+    // We need to clear hint bits from the instruction, in case it is 'jr ra'.
+    const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008;
+    unsigned* CurrentInstr = (unsigned*)Old;
+    unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask;
+    unsigned* NextInstr = CurrentInstr + 1;
+    unsigned NextInstrHintClear = (*NextInstr) & HintMask;
+
+    // Do absolute jump if there are 2 or more instructions before return from
+    // the old function.
+    if ((CurrInstrHintClear != ReturnSequence) &&
+        (NextInstrHintClear != ReturnSequence)) {
+      const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000;
+      const unsigned JrT0Instr = 0x01000008;
+      // lui  t0,  high 16 bit of the NewAddr
+      (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16);
+      // addiu  t0, t0, low 16 bit of the NewAddr
+      (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff);
+      // jr t0
+      (*(CurrentInstr++)) = JrT0Instr;
+      (*CurrentInstr) = NopInstr;
+
+      sys::Memory::InvalidateInstructionCache(Old, 4 * 4);
+    } else {
+      // Unsupported case
+      report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction");
+    }
+  }
 }
 
 /// JITCompilerFunction - This contains the address of the JIT function used to
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 70ecbc1..f78203f 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -207,7 +207,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) {
 // MachineBasicBlock operand MBBOpnd.
 void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br,
                                    DebugLoc DL, MachineBasicBlock *MBBOpnd) {
-  unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode());
+  unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode());
   const MCInstrDesc &NewDesc = TII->get(NewOpc);
 
   MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc);
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index b2232c6..df3c4c0 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -48,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo {
   // OutArgFIRange: Range of indices of all frame objects created during call to
   //                LowerCall except for the frame object for restoring $gp.
   std::pair<int, int> InArgFIRange, OutArgFIRange;
-  int GlobalRegFI;
-  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
   unsigned MaxCallFrameSize;
 
   bool EmitNOAT;
@@ -58,8 +56,7 @@ public:
   MipsFunctionInfo(MachineFunction& MF)
   : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
-    OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0),
-    MaxCallFrameSize(0), EmitNOAT(false)
+    OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false)
   {}
 
   bool isInArgFI(int FI) const {
@@ -77,34 +74,6 @@ public:
     OutArgFIRange.second = LastFI;
   }
 
-  bool isGlobalRegFI(int FI) const {
-    return GlobalRegFI && (FI == GlobalRegFI);
-  }
-
-  int getGlobalRegFI() const {
-    return GlobalRegFI;
-  }
-
-  int initGlobalRegFI() {
-    const TargetMachine &TM = MF.getTarget();
-    unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4;
-    int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment();
-    uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment);
-
-    GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true);
-    return GlobalRegFI;
-  }
-
-  // The first call to this function creates a frame object for dynamically
-  // allocated stack area.
-  int getDynAllocFI() const {
-    if (!DynAllocFI)
-      DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
-
-    return DynAllocFI;
-  }
-  bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
-
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index a3ce236..ae6ae3a 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -144,15 +144,6 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
   return true;
 }
 
-// This function eliminate ADJCALLSTACKDOWN,
-// ADJCALLSTACKUP pseudo instructions
-void MipsRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
-  MBB.erase(I);
-}
-
 // FrameIndex represent objects inside a abstract stack.
 // We must replace FrameIndex with an stack/frame pointer
 // direct reference.
@@ -161,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                     RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   unsigned i = 0;
   while (!MI.getOperand(i).isFI()) {
@@ -182,68 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                << "spOffset   : " << spOffset << "\n"
                << "stackSize  : " << stackSize << "\n");
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  int MinCSFI = 0;
-  int MaxCSFI = -1;
-
-  if (CSI.size()) {
-    MinCSFI = CSI[0].getFrameIdx();
-    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
-  }
-
-  // The following stack frame objects are always referenced relative to $sp:
-  //  1. Outgoing arguments.
-  //  2. Pointer to dynamically allocated stack space.
-  //  3. Locations for callee-saved registers.
-  // Everything else is referenced relative to whatever register
-  // getFrameRegister() returns.
-  unsigned FrameReg;
-
-  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
-      (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
-    FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
-  else
-    FrameReg = getFrameRegister(MF);
-
-  // Calculate final offset.
-  // - There is no need to change the offset if the frame object is one of the
-  //   following: an outgoing argument, pointer to a dynamically allocated
-  //   stack space or a $gp restore location,
-  // - If the frame object is any of the following, its offset must be adjusted
-  //   by adding the size of the stack:
-  //   incoming argument, callee-saved register location or local variable.
-  int64_t Offset;
-
-  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
-      MipsFI->isGlobalRegFI(FrameIndex))
-    Offset = spOffset;
-  else
-    Offset = spOffset + (int64_t)stackSize;
-
-  Offset    += MI.getOperand(i+1).getImm();
-
-  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
-
-  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
-  // field.
-  if (!MI.isDebugValue() && !isInt<16>(Offset)) {
-    MachineBasicBlock &MBB = *MI.getParent();
-    DebugLoc DL = II->getDebugLoc();
-    unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
-    unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
-    MipsAnalyzeImmediate::Inst LastInst(0, 0);
-
-    MipsFI->setEmitNOAT();
-    Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
-                        &LastInst);
-    BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
-
-    FrameReg = ATReg;
-    Offset = SignExtend64<16>(LastInst.ImmOpnd);
-  }
-
-  MI.getOperand(i).ChangeToRegister(FrameReg, false);
-  MI.getOperand(i+1).ChangeToImmediate(Offset);
+  eliminateFI(MI, i, FrameIndex, stackSize, spOffset);
 }
 
 unsigned MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index f320bae..9a05e94 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -25,10 +25,12 @@ class MipsSubtarget;
 class TargetInstrInfo;
 class Type;
 
-struct MipsRegisterInfo : public MipsGenRegisterInfo {
+class MipsRegisterInfo : public MipsGenRegisterInfo {
+protected:
   const MipsSubtarget &Subtarget;
   const TargetInstrInfo &TII;
 
+public:
   MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
 
   /// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -51,10 +53,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
 
   virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
 
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
   /// Stack Frame Processing Methods
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
@@ -67,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
   /// Exception handling queries.
   unsigned getEHExceptionRegister() const;
   unsigned getEHHandlerRegister() const;
+
+private:
+  virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+                           int FrameIndex, uint64_t StackSize,
+                           int64_t SPOffset) const = 0;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index b255e42..4015add 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,9 @@ let Namespace = "Mips" in {
   // fcc0 register
   def FCC0 : Register<"fcc0">;
 
+  // PC register
+  def PC : Register<"pc">;
+
   // Hardware register $29
   def HWR29 : Register<"29">;
   def HWR29_64 : Register<"29">;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
new file mode 100644
index 0000000..1c59847
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -0,0 +1,210 @@
+//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MipsSEInstrInfo &TII =
+    *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+  // First, compute final stack size.
+  uint64_t StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  MachineLocation DstML, SrcML;
+
+  // Adjust stack.
+  TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
+
+  // emit ".cfi_def_cfa_offset StackSize"
+  MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
+  BuildMI(MBB, MBBI, dl,
+          TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel);
+  DstML = MachineLocation(MachineLocation::VirtualFP);
+  SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize);
+  Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML));
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  if (CSI.size()) {
+    // Find the instruction past the last instruction that saves a callee-saved
+    // register to the stack.
+    for (unsigned i = 0; i < CSI.size(); ++i)
+      ++MBBI;
+
+    // Iterate over list of callee-saved registers and emit .cfi_offset
+    // directives.
+    MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl,
+            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
+
+    for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+           E = CSI.end(); I != E; ++I) {
+      int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+      unsigned Reg = I->getReg();
+
+      // If Reg is a double precision register, emit two cfa_offsets,
+      // one for each of the paired single precision registers.
+      if (Mips::AFGR64RegClass.contains(Reg)) {
+        MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
+        MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
+        MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven));
+        MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd));
+
+        if (!STI.isLittle())
+          std::swap(SrcML0, SrcML1);
+
+        Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0));
+        Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1));
+      } else {
+        // Reg is either in CPURegs or FGR32.
+        DstML = MachineLocation(MachineLocation::VirtualFP, Offset);
+        SrcML = MachineLocation(Reg);
+        Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
+      }
+    }
+  }
+
+  // if framepointer enabled, set it to point to the stack pointer.
+  if (hasFP(MF)) {
+    // Insert instruction "move $fp, $sp" at this location.
+    BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+
+    // emit ".cfi_def_cfa_register $fp"
+    MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl,
+            TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
+    DstML = MachineLocation(FP);
+    SrcML = MachineLocation(MachineLocation::VirtualFP);
+    Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
+  }
+}
+
+void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  const MipsSEInstrInfo &TII =
+    *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+
+  // if framepointer enabled, restore the stack pointer.
+  if (hasFP(MF)) {
+    // Find the first instruction that restores a callee-saved register.
+    MachineBasicBlock::iterator I = MBBI;
+
+    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+      --I;
+
+    // Insert instruction "move $sp, $fp" at this location.
+    BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+  }
+
+  // Get the number of bytes from FrameInfo
+  uint64_t StackSize = MFI->getStackSize();
+
+  if (!StackSize)
+    return;
+
+  // Adjust stack.
+  TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
+}
+
+bool MipsSEFrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  MachineBasicBlock *EntryBlock = MF->begin();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    // Add the callee-saved register as live-in. Do not add if the register is
+    // RA and return address is taken, because it has already been added in
+    // method MipsTargetLowering::LowerRETURNADDR.
+    // It's killed at the spill, unless the register is RA and return address
+    // is taken.
+    unsigned Reg = CSI[i].getReg();
+    bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
+        && MF->getFrameInfo()->isReturnAddressTaken();
+    if (!IsRAAndRetAddrIsTaken)
+      EntryBlock->addLiveIn(Reg);
+
+    // Insert the spill to the stack frame.
+    bool IsKill = !IsRAAndRetAddrIsTaken;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill,
+                            CSI[i].getFrameIdx(), RC, TRI);
+  }
+
+  return true;
+}
+
+bool
+MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Reserve call frame if the size of the maximum call frame fits into 16-bit
+  // immediate field and there are no variable sized objects on the stack.
+  return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+}
+
+void MipsSEFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+
+  // Mark $fp as used if function has dedicated frame pointer.
+  if (hasFP(MF))
+    MRI.setPhysRegUsed(FP);
+}
+
+const MipsFrameLowering *
+llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) {
+  return new MipsSEFrameLowering(ST);
+}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
new file mode 100644
index 0000000..6481a0a
--- /dev/null
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSE_FRAMEINFO_H
+#define MIPSSE_FRAMEINFO_H
+
+#include "MipsFrameLowering.h"
+
+namespace llvm {
+
+class MipsSEFrameLowering : public MipsFrameLowering {
+public:
+  explicit MipsSEFrameLowering(const MipsSubtarget &STI)
+    : MipsFrameLowering(STI) {}
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
new file mode 100644
index 0000000..eeb1de3
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -0,0 +1,320 @@
+//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSEInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm)
+  : MipsInstrInfo(tm,
+                  tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J),
+    RI(*tm.getSubtargetImpl(), *this),
+    IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {}
+
+const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const {
+  return RI;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsSEInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  unsigned Opc = MI->getOpcode();
+
+  if ((Opc == Mips::LW)    || (Opc == Mips::LW_P8)  || (Opc == Mips::LD) ||
+      (Opc == Mips::LD_P8) || (Opc == Mips::LWC1)   || (Opc == Mips::LWC1_P8) ||
+      (Opc == Mips::LDC1)  || (Opc == Mips::LDC164) ||
+      (Opc == Mips::LDC164_P8)) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsSEInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  unsigned Opc = MI->getOpcode();
+
+  if ((Opc == Mips::SW)    || (Opc == Mips::SW_P8)  || (Opc == Mips::SD) ||
+      (Opc == Mips::SD_P8) || (Opc == Mips::SWC1)   || (Opc == Mips::SWC1_P8) ||
+      (Opc == Mips::SDC1)  || (Opc == Mips::SDC164) ||
+      (Opc == Mips::SDC164_P8)) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  unsigned Opc = 0, ZeroReg = 0;
+
+  if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
+    if (Mips::CPURegsRegClass.contains(SrcReg))
+      Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+    else if (Mips::CCRRegClass.contains(SrcReg))
+      Opc = Mips::CFC1;
+    else if (Mips::FGR32RegClass.contains(SrcReg))
+      Opc = Mips::MFC1;
+    else if (SrcReg == Mips::HI)
+      Opc = Mips::MFHI, SrcReg = 0;
+    else if (SrcReg == Mips::LO)
+      Opc = Mips::MFLO, SrcReg = 0;
+  }
+  else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg.
+    if (Mips::CCRRegClass.contains(DestReg))
+      Opc = Mips::CTC1;
+    else if (Mips::FGR32RegClass.contains(DestReg))
+      Opc = Mips::MTC1;
+    else if (DestReg == Mips::HI)
+      Opc = Mips::MTHI, DestReg = 0;
+    else if (DestReg == Mips::LO)
+      Opc = Mips::MTLO, DestReg = 0;
+  }
+  else if (Mips::FGR32RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::FMOV_S;
+  else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::FMOV_D32;
+  else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::FMOV_D64;
+  else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
+    Opc = Mips::MOVCCRToCCR;
+  else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
+    if (Mips::CPU64RegsRegClass.contains(SrcReg))
+      Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+    else if (SrcReg == Mips::HI64)
+      Opc = Mips::MFHI64, SrcReg = 0;
+    else if (SrcReg == Mips::LO64)
+      Opc = Mips::MFLO64, SrcReg = 0;
+    else if (Mips::FGR64RegClass.contains(SrcReg))
+      Opc = Mips::DMFC1;
+  }
+  else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
+    if (DestReg == Mips::HI64)
+      Opc = Mips::MTHI64, DestReg = 0;
+    else if (DestReg == Mips::LO64)
+      Opc = Mips::MTLO64, DestReg = 0;
+    else if (Mips::FGR64RegClass.contains(DestReg))
+      Opc = Mips::DMTC1;
+  }
+
+  assert(Opc && "Cannot copy registers");
+
+  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
+
+  if (DestReg)
+    MIB.addReg(DestReg, RegState::Define);
+
+  if (ZeroReg)
+    MIB.addReg(ZeroReg);
+
+  if (SrcReg)
+    MIB.addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void MipsSEInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
+  unsigned Opc = 0;
+
+  if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::SW_P8 : Mips::SW;
+  else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::SD_P8 : Mips::SD;
+  else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1;
+  else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+    Opc = Mips::SDC1;
+  else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164;
+
+  assert(Opc && "Register class not handled!");
+  BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+}
+
+void MipsSEInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
+  unsigned Opc = 0;
+
+  if (Mips::CPURegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LW_P8 : Mips::LW;
+  else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LD_P8 : Mips::LD;
+  else if (Mips::FGR32RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1;
+  else if (Mips::AFGR64RegClass.hasSubClassEq(RC))
+    Opc = Mips::LDC1;
+  else if (Mips::FGR64RegClass.hasSubClassEq(RC))
+    Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
+
+  assert(Opc && "Register class not handled!");
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+    .addMemOperand(MMO);
+}
+
+bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+  MachineBasicBlock &MBB = *MI->getParent();
+
+  switch(MI->getDesc().getOpcode()) {
+  default:
+    return false;
+  case Mips::RetRA:
+    ExpandRetRA(MBB, MI, Mips::RET);
+    break;
+  case Mips::BuildPairF64:
+    ExpandBuildPairF64(MBB, MI);
+    break;
+  case Mips::ExtractElementF64:
+    ExpandExtractElementF64(MBB, MI);
+    break;
+  }
+
+  MBB.erase(MI);
+  return true;
+}
+
+/// GetOppositeBranchOpc - Return the inverse of the specified
+/// opcode, e.g. turning BEQ to BNE.
+unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:           llvm_unreachable("Illegal opcode!");
+  case Mips::BEQ:    return Mips::BNE;
+  case Mips::BNE:    return Mips::BEQ;
+  case Mips::BGTZ:   return Mips::BLEZ;
+  case Mips::BGEZ:   return Mips::BLTZ;
+  case Mips::BLTZ:   return Mips::BGEZ;
+  case Mips::BLEZ:   return Mips::BGTZ;
+  case Mips::BEQ64:  return Mips::BNE64;
+  case Mips::BNE64:  return Mips::BEQ64;
+  case Mips::BGTZ64: return Mips::BLEZ64;
+  case Mips::BGEZ64: return Mips::BLTZ64;
+  case Mips::BLTZ64: return Mips::BGEZ64;
+  case Mips::BLEZ64: return Mips::BGTZ64;
+  case Mips::BC1T:   return Mips::BC1F;
+  case Mips::BC1F:   return Mips::BC1T;
+  }
+}
+
+/// Adjust SP by Amount bytes.
+void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const {
+  const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+
+  if (isInt<16>(Amount))// addi sp, sp, amount
+    BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
+  else { // Expand immediate that doesn't fit in 16-bit.
+    unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT;
+
+    MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT();
+    Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0);
+    BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg);
+  }
+}
+
+unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
+  return (Opc == Mips::BEQ    || Opc == Mips::BNE    || Opc == Mips::BGTZ   ||
+          Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
+          Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
+          Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
+          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::B      ||
+          Opc == Mips::J) ?
+         Opc : 0;
+}
+
+void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                unsigned Opc) const {
+  BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+}
+
+void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I) const {
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned SrcReg = I->getOperand(1).getReg();
+  unsigned N = I->getOperand(2).getImm();
+  const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1);
+  DebugLoc dl = I->getDebugLoc();
+
+  assert(N < 2 && "Invalid immediate");
+  unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven;
+  unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx);
+
+  BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg);
+}
+
+void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const {
+  unsigned DstReg = I->getOperand(0).getReg();
+  unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
+  const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
+  DebugLoc dl = I->getDebugLoc();
+  const TargetRegisterInfo &TRI = getRegisterInfo();
+
+  // mtc1 Lo, $fp
+  // mtc1 Hi, $fp + 1
+  BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven))
+    .addReg(LoReg);
+  BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd))
+    .addReg(HiReg);
+}
+
+const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
+  return new MipsSEInstrInfo(TM);
+}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
new file mode 100644
index 0000000..346e74d
--- /dev/null
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -0,0 +1,86 @@
+//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEINSTRUCTIONINFO_H
+#define MIPSSEINSTRUCTIONINFO_H
+
+#include "MipsInstrInfo.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSERegisterInfo.h"
+
+namespace llvm {
+
+class MipsSEInstrInfo : public MipsInstrInfo {
+  const MipsSERegisterInfo RI;
+  bool IsN64;
+
+public:
+  explicit MipsSEInstrInfo(MipsTargetMachine &TM);
+
+  virtual const MipsRegisterInfo &getRegisterInfo() const;
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
+  virtual unsigned GetOppositeBranchOpc(unsigned Opc) const;
+
+  /// Adjust SP by Amount bytes.
+  void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I) const;
+
+private:
+  virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const;
+
+  void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                   unsigned Opc) const;
+  void ExpandExtractElementF64(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I) const;
+  void ExpandBuildPairF64(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator I) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
new file mode 100644
index 0000000..043a1ef
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -0,0 +1,138 @@
+//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSERegisterInfo.h"
+#include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST,
+                                       const TargetInstrInfo &TII)
+  : MipsRegisterInfo(ST, TII) {}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsSERegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+
+    if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+      Amount = -Amount;
+
+    const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII);
+    unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+
+    II->adjustStackPtr(SP, Amount, MBB, I);
+  }
+
+  MBB.erase(I);
+}
+
+void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
+                                     unsigned OpNo, int FrameIndex,
+                                     uint64_t StackSize,
+                                     int64_t SPOffset) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int MinCSFI = 0;
+  int MaxCSFI = -1;
+
+  if (CSI.size()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+  }
+
+  // The following stack frame objects are always referenced relative to $sp:
+  //  1. Outgoing arguments.
+  //  2. Pointer to dynamically allocated stack space.
+  //  3. Locations for callee-saved registers.
+  // Everything else is referenced relative to whatever register
+  // getFrameRegister() returns.
+  unsigned FrameReg;
+
+  if (MipsFI->isOutArgFI(FrameIndex) ||
+      (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
+    FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  else
+    FrameReg = getFrameRegister(MF);
+
+  // Calculate final offset.
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
+  // - If the frame object is any of the following, its offset must be adjusted
+  //   by adding the size of the stack:
+  //   incoming argument, callee-saved register location or local variable.
+  int64_t Offset;
+
+  if (MipsFI->isOutArgFI(FrameIndex))
+    Offset = SPOffset;
+  else
+    Offset = SPOffset + (int64_t)StackSize;
+
+  Offset    += MI.getOperand(OpNo + 1).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+  // field.
+  if (!MI.isDebugValue() && !isInt<16>(Offset)) {
+    MachineBasicBlock &MBB = *MI.getParent();
+    DebugLoc DL = II->getDebugLoc();
+    unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+    unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+    MipsAnalyzeImmediate::Inst LastInst(0, 0);
+
+    MipsFI->setEmitNOAT();
+    Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true,
+                        &LastInst);
+    BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+    FrameReg = ATReg;
+    Offset = SignExtend64<16>(LastInst.ImmOpnd);
+  }
+
+  MI.getOperand(OpNo).ChangeToRegister(FrameReg, false);
+  MI.getOperand(OpNo + 1).ChangeToImmediate(Offset);
+}
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
new file mode 100644
index 0000000..4b17b33
--- /dev/null
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -0,0 +1,39 @@
+//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips32/64 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSEREGISTERINFO_H
+#define MIPSSEREGISTERINFO_H
+
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+class MipsSERegisterInfo : public MipsRegisterInfo {
+public:
+  MipsSERegisterInfo(const MipsSubtarget &Subtarget,
+                     const TargetInstrInfo &TII);
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+private:
+  virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+                           int FrameIndex, uint64_t StackSize,
+                           int64_t SPOffset) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 3215c44..ba15362 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -89,6 +89,9 @@ protected:
   // InMips16 -- can process Mips16 instructions
   bool InMips16Mode;
 
+  // IsAndroid -- target is android
+  bool IsAndroid;
+
   InstrItineraryData InstrItins;
 
 public:
@@ -128,6 +131,7 @@ public:
   bool isNotSingleFloat() const { return !IsSingleFloat; }
   bool hasVFPU() const { return HasVFPU; }
   bool inMips16Mode() const { return InMips16Mode; }
+  bool isAndroid() const { return IsAndroid; }
   bool isLinux() const { return IsLinux; }
 
   bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index dd5d35f..2928a73 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -13,6 +13,8 @@
 
 #include "MipsTargetMachine.h"
 #include "Mips.h"
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
   RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
-  RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target);
-  RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget);
+  RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
+  RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -48,9 +50,10 @@ MipsTargetMachine(const Target &T, StringRef TT,
                (Subtarget.isABI_N64() ?
                 "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
                 "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
-    InstrInfo(*this),
-    FrameLowering(Subtarget),
-    TLInfo(*this), TSInfo(*this), JITInfo() {
+    InstrInfo(MipsInstrInfo::create(*this)),
+    FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
+    TLInfo(*this), TSInfo(*this), JITInfo(),
+    ELFWriterInfo(false, isLittle) {
 }
 
 void MipsebTargetMachine::anchor() { }
@@ -71,24 +74,6 @@ MipselTargetMachine(const Target &T, StringRef TT,
                     CodeGenOpt::Level OL)
   : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
-void Mips64ebTargetMachine::anchor() { }
-
-Mips64ebTargetMachine::
-Mips64ebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
-                      Reloc::Model RM, CodeModel::Model CM,
-                      CodeGenOpt::Level OL)
-  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-
-void Mips64elTargetMachine::anchor() { }
-
-Mips64elTargetMachine::
-Mips64elTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
-                      Reloc::Model RM, CodeModel::Model CM,
-                      CodeGenOpt::Level OL)
-  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-
 namespace {
 /// Mips Code Generator Pass Configuration Options.
 class MipsPassConfig : public TargetPassConfig {
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 5cbf057..a542ef6 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -20,59 +20,67 @@
 #include "MipsJITInfo.h"
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
+#include "MipsELFWriterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class formatted_raw_ostream;
-
-  class MipsTargetMachine : public LLVMTargetMachine {
-    MipsSubtarget       Subtarget;
-    const TargetData    DataLayout; // Calculates type size & alignment
-    MipsInstrInfo       InstrInfo;
-    MipsFrameLowering   FrameLowering;
-    MipsTargetLowering  TLInfo;
-    MipsSelectionDAGInfo TSInfo;
-    MipsJITInfo JITInfo;
-
-  public:
-    MipsTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS, const TargetOptions &Options,
-                      Reloc::Model RM, CodeModel::Model CM,
-                      CodeGenOpt::Level OL,
-                      bool isLittle);
-
-    virtual const MipsInstrInfo   *getInstrInfo()     const
-    { return &InstrInfo; }
-    virtual const TargetFrameLowering *getFrameLowering()     const
-    { return &FrameLowering; }
-    virtual const MipsSubtarget   *getSubtargetImpl() const
-    { return &Subtarget; }
-    virtual const TargetData      *getTargetData()    const
-    { return &DataLayout;}
-    virtual MipsJITInfo *getJITInfo()
-    { return &JITInfo; }
-
-
-    virtual const MipsRegisterInfo *getRegisterInfo()  const {
-      return &InstrInfo.getRegisterInfo();
-    }
-
-    virtual const MipsTargetLowering *getTargetLowering() const {
-      return &TLInfo;
-    }
-
-    virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
-      return &TSInfo;
-    }
-
-    // Pass Pipeline Configuration
-    virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-    virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
-  };
-
-/// MipsebTargetMachine - Mips32 big endian target machine.
+class formatted_raw_ostream;
+class MipsRegisterInfo;
+
+class MipsTargetMachine : public LLVMTargetMachine {
+  MipsSubtarget       Subtarget;
+  const TargetData    DataLayout; // Calculates type size & alignment
+  const MipsInstrInfo *InstrInfo;
+  const MipsFrameLowering *FrameLowering;
+  MipsTargetLowering  TLInfo;
+  MipsSelectionDAGInfo TSInfo;
+  MipsJITInfo JITInfo;
+  MipsELFWriterInfo   ELFWriterInfo;
+
+public:
+  MipsTargetMachine(const Target &T, StringRef TT,
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
+                    Reloc::Model RM, CodeModel::Model CM,
+                    CodeGenOpt::Level OL,
+                    bool isLittle);
+
+  virtual ~MipsTargetMachine() { delete InstrInfo; }
+
+  virtual const MipsInstrInfo *getInstrInfo() const
+  { return InstrInfo; }
+  virtual const TargetFrameLowering *getFrameLowering() const
+  { return FrameLowering; }
+  virtual const MipsSubtarget *getSubtargetImpl() const
+  { return &Subtarget; }
+  virtual const TargetData *getTargetData()    const
+  { return &DataLayout;}
+  virtual MipsJITInfo *getJITInfo()
+  { return &JITInfo; }
+
+  virtual const MipsRegisterInfo *getRegisterInfo()  const {
+    return &InstrInfo->getRegisterInfo();
+  }
+
+  virtual const MipsTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const MipsELFWriterInfo *getELFWriterInfo() const {
+    return &ELFWriterInfo;
+  }
+
+  // Pass Pipeline Configuration
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+  virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+};
+
+/// MipsebTargetMachine - Mips32/64 big endian target machine.
 ///
 class MipsebTargetMachine : public MipsTargetMachine {
   virtual void anchor();
@@ -83,7 +91,7 @@ public:
                       CodeGenOpt::Level OL);
 };
 
-/// MipselTargetMachine - Mips32 little endian target machine.
+/// MipselTargetMachine - Mips32/64 little endian target machine.
 ///
 class MipselTargetMachine : public MipsTargetMachine {
   virtual void anchor();
@@ -94,29 +102,6 @@ public:
                       CodeGenOpt::Level OL);
 };
 
-/// Mips64ebTargetMachine - Mips64 big endian target machine.
-///
-class Mips64ebTargetMachine : public MipsTargetMachine {
-  virtual void anchor();
-public:
-  Mips64ebTargetMachine(const Target &T, StringRef TT,
-                        StringRef CPU, StringRef FS,
-                        const TargetOptions &Options,
-                        Reloc::Model RM, CodeModel::Model CM,
-                        CodeGenOpt::Level OL);
-};
-
-/// Mips64elTargetMachine - Mips64 little endian target machine.
-///
-class Mips64elTargetMachine : public MipsTargetMachine {
-  virtual void anchor();
-public:
-  Mips64elTargetMachine(const Target &T, StringRef TT,
-                        StringRef CPU, StringRef FS,
-                        const TargetOptions &Options,
-                        Reloc::Model RM, CodeModel::Model CM,
-                        CodeGenOpt::Level OL);
-};
 } // End llvm namespace
 
 #endif
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index f50f9b5..2a2abb1 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -337,7 +337,10 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
     // can get a useful trip count.  The trip count can
     // be either a register or an immediate.  The location
     // of the value depends upon the type (reg or imm).
-    while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+    for (MachineRegisterInfo::reg_iterator
+         RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
+         RI != RE; ++RI) {
+      IV_Opnd = &RI.getOperand();
       bool SignedCmp;
       MachineInstr *MI = IV_Opnd->getParent();
       if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) &&
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 13250b3..61d44c5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
-  // We do not currently implment this libm ops for PowerPC.
+  // We do not currently implement these libm ops for PowerPC.
   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
@@ -394,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
   }
 
-  if (Subtarget->has64BitSupport())
+  if (Subtarget->has64BitSupport()) {
     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+  }
 
   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 91c5366..39778a5 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -265,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
              PPC970_DGroup_First, PPC970_Unit_FXU;
 }
 
+let Pattern = [(set G8RC:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins),
+                          "mfspr $rT, 268", SprMFTB>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
 let Defs = [X1], Uses = [X1] in
 def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
                        [(set G8RC:$result,
diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile
index a101aa4..2d0560d 100644
--- a/lib/Target/PowerPC/TargetInfo/Makefile
+++ b/lib/Target/PowerPC/TargetInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMPowerPCInfo
 
 # Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index cbfa4cf..9c27f27 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2367,8 +2367,3 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
 should fold to x > y.
 
 //===---------------------------------------------------------------------===//
-
-int f(double x) { return __builtin_fabs(x) < 0.0; }
-should fold to false.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 6357468..ff8d3c5 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 }
 
-void SparcRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
-
 unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return SP::I6;
 }
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index ec95ad4..8e215a7 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { }
 
 const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
   {
+    "__cxa_atexit",
+    "__cxa_guard_abort",
+    "__cxa_guard_acquire",
+    "__cxa_guard_release",
+    "__memcpy_chk",
     "acos",
-    "acosl",
     "acosf",
+    "acosl",
     "asin",
-    "asinl",
     "asinf",
+    "asinl",
     "atan",
-    "atanl",
-    "atanf",
     "atan2",
-    "atan2l",
     "atan2f",
+    "atan2l",
+    "atanf",
+    "atanl",
     "ceil",
-    "ceill",
     "ceilf",
+    "ceill",
     "copysign",
     "copysignf",
     "copysignl",
     "cos",
-    "cosl",
     "cosf",
     "cosh",
-    "coshl",
     "coshf",
+    "coshl",
+    "cosl",
     "exp",
-    "expl",
-    "expf",
     "exp2",
-    "exp2l",
     "exp2f",
+    "exp2l",
+    "expf",
+    "expl",
     "expm1",
-    "expm1l",
     "expm1f",
+    "expm1l",
     "fabs",
-    "fabsl",
     "fabsf",
+    "fabsl",
+    "fiprintf",
     "floor",
-    "floorl",
     "floorf",
-    "fiprintf",
+    "floorl",
     "fmod",
-    "fmodl",
     "fmodf",
+    "fmodl",
+    "fputc",
     "fputs",
     "fwrite",
     "iprintf",
     "log",
-    "logl",
-    "logf",
-    "log2",
-    "log2l",
-    "log2f",
     "log10",
-    "log10l",
     "log10f",
+    "log10l",
     "log1p",
-    "log1pl",
     "log1pf",
+    "log1pl",
+    "log2",
+    "log2f",
+    "log2l",
+    "logf",
+    "logl",
+    "memchr",
+    "memcmp",
     "memcpy",
     "memmove",
     "memset",
@@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "pow",
     "powf",
     "powl",
+    "putchar",
+    "puts",
     "rint",
     "rintf",
     "rintl",
@@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
     "roundf",
     "roundl",
     "sin",
-    "sinl",
     "sinf",
     "sinh",
-    "sinhl",
     "sinhf",
+    "sinhl",
+    "sinl",
     "siprintf",
     "sqrt",
-    "sqrtl",
     "sqrtf",
+    "sqrtl",
+    "strcat",
+    "strchr",
+    "strcpy",
+    "strlen",
+    "strncat",
+    "strncmp",
+    "strncpy",
+    "strnlen",
     "tan",
-    "tanl",
     "tanf",
     "tanh",
-    "tanhl",
     "tanhf",
+    "tanhl",
+    "tanl",
     "trunc",
     "truncf",
-    "truncl",
-    "__cxa_atexit",
-    "__cxa_guard_abort",
-    "__cxa_guard_acquire",
-    "__cxa_guard_release"
+    "truncl"
   };
 
 /// initialize - Initialize the set of available library functions based on the
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
-static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+static void initialize(TargetLibraryInfo &TLI, const Triple &T,
+                       const char **StandardNames) {
   initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
 
+#ifndef NDEBUG
+  // Verify that the StandardNames array is in alphabetical order.
+  for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) {
+    if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0)
+      llvm_unreachable("TargetLibraryInfo function names must be sorted");
+  }
+#endif // !NDEBUG
   
   // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
   if (T.isMacOSX()) {
@@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
   // Default to everything being available.
   memset(AvailableArray, -1, sizeof(AvailableArray));
 
-  initialize(*this, Triple());
+  initialize(*this, Triple(), StandardNames);
 }
 
 TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
   // Default to everything being available.
   memset(AvailableArray, -1, sizeof(AvailableArray));
   
-  initialize(*this, T);
+  initialize(*this, T, StandardNames);
 }
 
 TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
@@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
   CustomNames = TLI.CustomNames;
 }
 
+bool TargetLibraryInfo::getLibFunc(StringRef funcName,
+                                   LibFunc::Func &F) const {
+  const char **Start = &StandardNames[0];
+  const char **End = &StandardNames[LibFunc::NumLibFuncs];
+  const char **I = std::lower_bound(Start, End, funcName);
+  if (I != End && *I == funcName) {
+    F = (LibFunc::Func)(I - Start);
+    return true;
+  }
+  return false;
+}
 
 /// disableAllFunctions - This disables all builtins, which is used for options
 /// like -fno-builtin.
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 95e83ec..73a0095 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -39,7 +39,9 @@ private:
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
   bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
+             bool matchingInlineAsm = false) {
+    if (matchingInlineAsm) return true;
     return Parser.Error(L, Msg, Ranges);
   }
 
@@ -65,6 +67,12 @@ private:
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                                MCStreamer &Out);
 
+  bool MatchInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        SmallVectorImpl<MCInst> &MCInsts,
+                        unsigned &OrigErrorInfo,
+                        bool matchingInlineAsm = false);
+
   /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
   /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
   bool isSrcOp(X86Operand &Op);
@@ -1508,9 +1516,24 @@ bool X86AsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCStreamer &Out) {
+  SmallVector<MCInst, 2> Insts;
+  unsigned ErrorInfo;
+  bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo);
+  if (!Error)
+    for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+      Out.EmitInstruction(Insts[i]);
+  return Error;
+}
+
+bool X86AsmParser::
+MatchInstruction(SMLoc IDLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                 SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo,
+                 bool matchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+  ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
 
   // First, handle aliases that expand to multiple instructions.
   // FIXME: This should be replaced with a real .td file alias mechanism.
@@ -1523,7 +1546,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     MCInst Inst;
     Inst.setOpcode(X86::WAIT);
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst);
+    MCInsts.push_back(Inst);
 
     const char *Repl =
       StringSwitch<const char*>(Op->getToken())
@@ -1542,7 +1565,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   }
 
   bool WasOriginallyInvalidOperand = false;
-  unsigned OrigErrorInfo;
   MCInst Inst;
 
   // First, try a direct match.
@@ -1557,13 +1579,15 @@ MatchAndEmitInstruction(SMLoc IDLoc,
       ;
 
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst);
+    MCInsts.push_back(Inst);
     return false;
   case Match_MissingFeature:
-    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+          EmptyRanges, matchingInlineAsm);
     return true;
   case Match_ConversionFail:
-    return Error(IDLoc, "unable to convert operands to instruction");
+    return Error(IDLoc, "unable to convert operands to instruction",
+                 EmptyRanges, matchingInlineAsm);
   case Match_InvalidOperand:
     WasOriginallyInvalidOperand = true;
     break;
@@ -1615,7 +1639,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     (Match3 == Match_Success) + (Match4 == Match_Success);
   if (NumSuccessfulMatches == 1) {
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst);
+    MCInsts.push_back(Inst);
     return false;
   }
 
@@ -1642,7 +1666,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
       OS << "'" << Base << MatchChars[i] << "'";
     }
     OS << ")";
-    Error(IDLoc, OS.str());
+    Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
     return true;
   }
 
@@ -1654,30 +1678,33 @@ MatchAndEmitInstruction(SMLoc IDLoc,
       (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
     if (!WasOriginallyInvalidOperand) {
       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
-                   Op->getLocRange());
+                   Op->getLocRange(), matchingInlineAsm);
     }
 
     // Recover location info for the operand if we know which was the problem.
     if (OrigErrorInfo != ~0U) {
       if (OrigErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
+        return Error(IDLoc, "too few operands for instruction",
+                     EmptyRanges, matchingInlineAsm);
 
       X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
       if (Operand->getStartLoc().isValid()) {
         SMRange OperandRange = Operand->getLocRange();
         return Error(Operand->getStartLoc(), "invalid operand for instruction",
-                     OperandRange);
+                     OperandRange, matchingInlineAsm);
       }
     }
 
-    return Error(IDLoc, "invalid operand for instruction");
+    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+                 matchingInlineAsm);
   }
 
   // If one instruction matched with a missing feature, report this as a
   // missing feature.
   if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
       (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
-    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled",
+          EmptyRanges, matchingInlineAsm);
     return true;
   }
 
@@ -1685,12 +1712,14 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   // operand failure.
   if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
       (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
-    Error(IDLoc, "invalid operand for instruction");
+    Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+          matchingInlineAsm);
     return true;
   }
 
   // If all of these were an outright failure, report it in a useless way.
-  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
+        EmptyRanges, matchingInlineAsm);
   return true;
 }
 
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 4bbfe95..5039887 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -327,7 +327,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   if (type == TYPE_RELv) {
     isBranch = true;
     pcrel = insn.startLocation +
-            insn.displacementOffset + insn.displacementSize;
+            insn.immediateOffset + insn.immediateSize;
     switch (insn.displacementSize) {
     default:
       break;
@@ -762,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
     translateRegister(mcInst, insn.vvvv);
     return false;
   case ENCODING_DUP:
-    return translateOperand(mcInst,
-                            insn.spec->operands[operand.type - TYPE_DUP0],
+    return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
                             insn, Dis);
   }
 }
@@ -789,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst,
   insn.numImmediatesTranslated = 0;
   
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
-    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
-      if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
+    if (insn.operands[index].encoding != ENCODING_NONE) {
+      if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
         return true;
       }
     }
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index c11f51c..0dbfa26 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -20,7 +20,7 @@
 // 2. Read the opcode, and determine what kind of opcode it is.  The
 //    disassembler distinguishes four kinds of opcodes, which are enumerated in
 //    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
-//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 
+//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
 //    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
 //
 // 3. Depending on the opcode type, look in one of four ClassDecision structures
@@ -74,8 +74,8 @@
 #ifndef X86DISASSEMBLER_H
 #define X86DISASSEMBLER_H
 
-#define INSTRUCTION_SPECIFIER_FIELDS  \
-  const char*             name;
+#define INSTRUCTION_SPECIFIER_FIELDS \
+  uint16_t operands;
 
 #define INSTRUCTION_IDS               \
   unsigned instructionIDs;
@@ -88,7 +88,7 @@
 #include "llvm/MC/MCDisassembler.h"
 
 namespace llvm {
-  
+
 class MCInst;
 class MCInstrInfo;
 class MCSubtargetInfo;
@@ -96,7 +96,7 @@ class MemoryObject;
 class raw_ostream;
 
 struct EDInstInfo;
-  
+
 namespace X86Disassembler {
 
 /// X86GenericDisassembler - Generic disassembler for all X86 platforms.
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 6020877..0c92912 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) {
   needVVVV = hasVVVV && (insn->vvvv != 0);
   
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
-    switch (insn->spec->operands[index].encoding) {
+    switch (x86OperandSets[insn->spec->operands][index].encoding) {
     case ENCODING_NONE:
       break;
     case ENCODING_REG:
     case ENCODING_RM:
       if (readModRM(insn))
         return -1;
-      if (fixupReg(insn, &insn->spec->operands[index]))
+      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
         return -1;
       break;
     case ENCODING_CB:
@@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) {
       }
       if (readImmediate(insn, 1))
         return -1;
-      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
         return -1;
-      if (insn->spec->operands[index].type == TYPE_IMM5 &&
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
           insn->immediates[insn->numImmediatesConsumed - 1] > 31)
         return -1;
-      if (insn->spec->operands[index].type == TYPE_XMM128 ||
-          insn->spec->operands[index].type == TYPE_XMM256)
+      if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
+          x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
         sawRegImm = 1;
       break;
     case ENCODING_IW:
@@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) {
       needVVVV = 0; /* Mark that we have found a VVVV operand. */
       if (!hasVVVV)
         return -1;
-      if (fixupReg(insn, &insn->spec->operands[index]))
+      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
         return -1;
       break;
     case ENCODING_DUP:
@@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn,
       insn->instructionID == 0 ||
       readOperands(insn))
     return -1;
+
+  insn->operands = &x86OperandSets[insn->spec->operands][0];
   
   insn->length = insn->readerCursor - insn->startLocation;
   
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index e2caf6a..797703f 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -19,17 +19,18 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-  
-#define INSTRUCTION_SPECIFIER_FIELDS
+
+#define INSTRUCTION_SPECIFIER_FIELDS \
+  uint16_t operands;
 
 #define INSTRUCTION_IDS     \
   unsigned instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
-  
+
 #undef INSTRUCTION_SPECIFIER_FIELDS
 #undef INSTRUCTION_IDS
-  
+
 /*
  * Accessor functions for various fields of an Intel instruction
  */
@@ -43,7 +44,7 @@ extern "C" {
 #define rFromREX(rex)        (((rex) & 0x4) >> 2)
 #define xFromREX(rex)        (((rex) & 0x2) >> 1)
 #define bFromREX(rex)        ((rex) & 0x1)
-    
+
 #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)
 #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)
 #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5)
@@ -237,7 +238,7 @@ extern "C" {
   ENTRY(YMM13)    \
   ENTRY(YMM14)    \
   ENTRY(YMM15)
-    
+
 #define REGS_SEGMENT \
   ENTRY(ES)          \
   ENTRY(CS)          \
@@ -245,7 +246,7 @@ extern "C" {
   ENTRY(DS)          \
   ENTRY(FS)          \
   ENTRY(GS)
-  
+
 #define REGS_DEBUG  \
   ENTRY(DR0)        \
   ENTRY(DR1)        \
@@ -266,12 +267,12 @@ extern "C" {
   ENTRY(CR6)          \
   ENTRY(CR7)          \
   ENTRY(CR8)
-  
+
 #define ALL_EA_BASES  \
   EA_BASES_16BIT      \
   EA_BASES_32BIT      \
   EA_BASES_64BIT
-  
+
 #define ALL_SIB_BASES \
   REGS_32BIT          \
   REGS_64BIT
@@ -290,7 +291,7 @@ extern "C" {
   ENTRY(RIP)
 
 /*
- * EABase - All possible values of the base field for effective-address 
+ * EABase - All possible values of the base field for effective-address
  *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
  *   distinguish between bases (EA_BASE_*) and registers that just happen to be
  *   referred to when Mod == 0b11 (EA_REG_*).
@@ -305,8 +306,8 @@ typedef enum {
 #undef ENTRY
   EA_max
 } EABase;
-  
-/* 
+
+/*
  * SIBIndex - All possible values of the SIB index field.
  *   Borrows entries from ALL_EA_BASES with the special case that
  *   sib is synonymous with NONE.
@@ -321,7 +322,7 @@ typedef enum {
 #undef ENTRY
   SIB_INDEX_max
 } SIBIndex;
-  
+
 /*
  * SIBBase - All possible values of the SIB base field.
  */
@@ -353,7 +354,7 @@ typedef enum {
 #undef ENTRY
   MODRM_REG_max
 } Reg;
-  
+
 /*
  * SegmentOverride - All possible segment overrides.
  */
@@ -367,7 +368,7 @@ typedef enum {
   SEG_OVERRIDE_GS,
   SEG_OVERRIDE_max
 } SegmentOverride;
-    
+
 /*
  * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
  */
@@ -431,16 +432,16 @@ struct InternalInstruction {
   void* dlogArg;
 
   /* General instruction information */
-  
+
   /* The mode to disassemble for (64-bit, protected, real) */
   DisassemblerMode mode;
   /* The start of the instruction, usable with the reader */
   uint64_t startLocation;
   /* The length of the instruction, in bytes */
   size_t length;
-  
+
   /* Prefix state */
-  
+
   /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
   uint8_t prefixPresent[0x100];
   /* contains the location (for use with the reader) of the prefix byte */
@@ -456,7 +457,7 @@ struct InternalInstruction {
   uint64_t necessaryPrefixLocation;
   /* The segment override type */
   SegmentOverride segmentOverride;
-  
+
   /* Sizes of various critical pieces of data, in bytes */
   uint8_t registerSize;
   uint8_t addressSize;
@@ -467,9 +468,9 @@ struct InternalInstruction {
      needed to find relocation entries for adding symbolic operands */
   uint8_t displacementOffset;
   uint8_t immediateOffset;
-  
+
   /* opcode state */
-  
+
   /* The value of the two-byte escape prefix (usually 0x0f) */
   uint8_t twoByteEscape;
   /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
@@ -478,16 +479,16 @@ struct InternalInstruction {
   uint8_t opcode;
   /* The ModR/M byte of the instruction, if it is an opcode extension */
   uint8_t modRMExtension;
-  
+
   /* decode state */
-  
+
   /* The type of opcode, used for indexing into the array of decode tables */
   OpcodeType opcodeType;
   /* The instruction ID, extracted from the decode table */
   uint16_t instructionID;
   /* The specifier for the instruction, from the instruction info table */
   const struct InstructionSpecifier *spec;
-  
+
   /* state for additional bytes, consumed during operand decode.  Pattern:
      consumed___ indicates that the byte was already consumed and does not
      need to be consumed again */
@@ -495,12 +496,12 @@ struct InternalInstruction {
   /* The VEX.vvvv field, which contains a third register operand for some AVX
      instructions */
   Reg                           vvvv;
-  
+
   /* The ModR/M byte, which contains most register operands and some portion of
      all memory operands */
   BOOL                          consumedModRM;
   uint8_t                       modRM;
-  
+
   /* The SIB byte, used for more complex 32- or 64-bit memory operands */
   BOOL                          consumedSIB;
   uint8_t                       sib;
@@ -508,19 +509,19 @@ struct InternalInstruction {
   /* The displacement, used for memory operands */
   BOOL                          consumedDisplacement;
   int32_t                       displacement;
-  
+
   /* Immediates.  There can be two in some cases */
   uint8_t                       numImmediatesConsumed;
   uint8_t                       numImmediatesTranslated;
   uint64_t                      immediates[2];
-  
+
   /* A register or immediate operand encoded into the opcode */
   BOOL                          consumedOpcodeModifier;
   uint8_t                       opcodeModifier;
   Reg                           opcodeRegister;
-  
+
   /* Portions of the ModR/M byte */
-  
+
   /* These fields determine the allowable values for the ModR/M fields, which
      depend on operand and address widths */
   EABase                        eaBaseBase;
@@ -533,11 +534,13 @@ struct InternalInstruction {
   EADisplacement                eaDisplacement;
   /* The reg field always encodes a register */
   Reg                           reg;
-  
+
   /* SIB state */
   SIBIndex                      sibIndex;
   uint8_t                       sibScale;
   SIBBase                       sibBase;
+
+  const struct OperandSpecifier *operands;
 };
 
 /* decodeInstruction - Decode one instruction and store the decoding results in
@@ -571,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn,
  * @param line  - The line number that printed the debug message.
  * @param s     - The message to print.
  */
-  
+
 void x86DisassemblerDebug(const char *file,
                           unsigned line,
                           const char *s);
 
 const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
 
-#ifdef __cplusplus 
+#ifdef __cplusplus
 }
 #endif
-  
+
 #endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 13e1136..b0a0e1e 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -119,7 +119,7 @@ enum attributeBits {
   ENUM_ENTRY(IC_VEX_L_W_OPSIZE,     5,  "requires VEX, L, W and OpSize")
 
 
-#define ENUM_ENTRY(n, r, d) n,    
+#define ENUM_ENTRY(n, r, d) n,
 typedef enum {
   INSTRUCTION_CONTEXTS
   IC_max
@@ -148,11 +148,11 @@ typedef enum {
  * If a ModR/M byte is not required, "required" is left unset, and the values
  * for each instructionID are identical.
  */
- 
+
 typedef uint16_t InstrUID;
 
 /*
- * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the
  * consumer to determine the number of entries in it.
  *
  * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
@@ -172,7 +172,7 @@ typedef uint16_t InstrUID;
   ENUM_ENTRY(MODRM_SPLITREG)  \
   ENUM_ENTRY(MODRM_FULL)
 
-#define ENUM_ENTRY(n) n,    
+#define ENUM_ENTRY(n) n,
 typedef enum {
   MODRMTYPES
   MODRM_max
@@ -180,13 +180,13 @@ typedef enum {
 #undef ENUM_ENTRY
 
 /*
- * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
  *  instruction each possible value of the ModR/M byte corresponds to.  Once
  *  this information is known, we have narrowed down to a single instruction.
  */
 struct ModRMDecision {
   uint8_t     modrm_type;
-  
+
   /* The macro below must be defined wherever this file is included. */
   INSTRUCTION_IDS
 };
@@ -210,7 +210,7 @@ struct ContextDecision {
   struct OpcodeDecision opcodeDecisions[IC_max];
 };
 
-/* 
+/*
  * Physical encodings of instruction operands.
  */
 
@@ -244,14 +244,14 @@ struct ContextDecision {
   ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
                               "in type")
 
-#define ENUM_ENTRY(n, d) n,    
+#define ENUM_ENTRY(n, d) n,
   typedef enum {
     ENCODINGS
     ENCODING_max
   } OperandEncoding;
 #undef ENUM_ENTRY
 
-/* 
+/*
  * Semantic interpretations of instruction operands.
  */
 
@@ -332,14 +332,14 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
   ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
 
-#define ENUM_ENTRY(n, d) n,    
+#define ENUM_ENTRY(n, d) n,
 typedef enum {
   TYPES
   TYPE_max
 } OperandType;
 #undef ENUM_ENTRY
 
-/* 
+/*
  * OperandSpecifier - The specification for how to extract and interpret one
  *   operand.
  */
@@ -374,8 +374,7 @@ typedef enum {
 struct InstructionSpecifier {
   uint8_t modifierType;
   uint8_t modifierBase;
-  struct OperandSpecifier operands[X86_MAX_OPERANDS];
-  
+
   /* The macro below must be defined wherever this file is included. */
   INSTRUCTION_SPECIFIER_FIELDS
 };
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 49c07f3..b0acd7d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -91,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   // Exceptions handling
   ExceptionsType = ExceptionHandling::DwarfCFI;
 
-  // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
-  // .words.
-  if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+  // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split
+  // into two .words.
+  if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
+       T.getArch() == Triple::x86)
     Data64bitsDirective = 0;
 }
 
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index bf05ccf..dce5b4d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -26,7 +26,7 @@ class FunctionPass;
 class JITCodeEmitter;
 class X86TargetMachine;
 
-/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// createX86ISelDag - This pass converts a legalized DAG into a
 /// X86-specific DAG, ready for instruction scheduling.
 ///
 FunctionPass *createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6c1a816..18e6b7c 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,14 +17,14 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// X86 Subtarget state.
+// X86 Subtarget state
 //
 
 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
                                   "64-bit mode (x86_64)">;
 
 //===----------------------------------------------------------------------===//
-// X86 Subtarget features.
+// X86 Subtarget features
 //===----------------------------------------------------------------------===//
 
 def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
@@ -97,7 +97,7 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                       [FeatureAVX, FeatureSSE4A]>;
 def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                       "Enable XOP instructions",
-                                      [FeatureAVX, FeatureSSE4A]>;
+                                      [FeatureFMA4]>;
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                           "HasVectorUAMem", "true",
                  "Allow unaligned memory operands on vector/SIMD instructions">;
@@ -226,7 +226,7 @@ def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePCLMUL,
                                FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI]>;
+                               FeaturePOPCNT, FeatureBMI, FeatureFMA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index a6ed9ba..35386cd 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   virtual const char *getPassName() const {
     return "X86 AT&T-Style Assembly Printer";
   }
-  
+
   const X86Subtarget &getSubtarget() const { return *Subtarget; }
 
   virtual void EmitStartOfAsmFile(Module &M);
 
   virtual void EmitEndOfAsmFile(Module &M);
-  
+
   virtual void EmitInstruction(const MachineInstr *MI);
-  
+
   void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
 
   // These methods are used by the tablegen'erated instruction printer.
@@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
 
   bool runOnMachineFunction(MachineFunction &F);
-  
+
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 
   MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index e01ff41..6a6125b 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -17,4 +17,3 @@ using namespace llvm;
 
 X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
 }
-
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0cec95a..471eb31 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
+//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,7 +33,7 @@ public:
   void addExternalFunction(MCSymbol* Symbol) {
     Externals.insert(Symbol);
   }
-    
+
   typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
   externals_iterator externals_begin() const { return Externals.begin(); }
   externals_iterator externals_end() const { return Externals.end(); }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 585b7a5..e5952aa 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -57,7 +57,9 @@ class X86FastISel : public FastISel {
   bool X86ScalarSSEf32;
 
 public:
-  explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
+                       const TargetLibraryInfo *libInfo)
+    : FastISel(funcInfo, libInfo) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
     StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
     X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
   // For now, require SSE/SSE2 for performing floating-point operations,
   // since x87 requires additional work.
   if (VT == MVT::f64 && !X86ScalarSSEf64)
-     return false;
+    return false;
   if (VT == MVT::f32 && !X86ScalarSSEf32)
-     return false;
+    return false;
   // Similarly, no f80 support yet.
   if (VT == MVT::f80)
     return false;
@@ -1516,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   return DoSelectCall(I, 0);
 }
 
+static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
+                                           const ImmutableCallSite &CS) {
+  if (Subtarget.is64Bit())
+    return 0;
+  if (Subtarget.isTargetWindows())
+    return 0;
+  CallingConv::ID CC = CS.getCallingConv();
+  if (CC == CallingConv::Fast || CC == CallingConv::GHC)
+    return 0;
+  if (!CS.paramHasAttr(1, Attribute::StructRet))
+    return 0;
+  if (CS.paramHasAttr(1, Attribute::InReg))
+    return 0;
+  return 4;
+}
+
 // Select either a call, or an llvm.memcpy/memmove/memset intrinsic
 bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   const CallInst *CI = cast<CallInst>(I);
@@ -1862,12 +1880,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // Issue CALLSEQ_END
   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
-  unsigned NumBytesCallee = 0;
-  if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
-      !(CS.getCallingConv() == CallingConv::Fast ||
-        CS.getCallingConv() == CallingConv::GHC) &&
-      CS.paramHasAttr(1, Attribute::StructRet))
-    NumBytesCallee = 4;
+  const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
     .addImm(NumBytes).addImm(NumBytesCallee);
 
@@ -2129,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
   unsigned Opc = 0;
   const TargetRegisterClass *RC = NULL;
   switch (VT.SimpleTy) {
-    default: return false;
-    case MVT::f32:
-      if (X86ScalarSSEf32) {
-        Opc = X86::FsFLD0SS;
-        RC  = &X86::FR32RegClass;
-      } else {
-        Opc = X86::LD_Fp032;
-        RC  = &X86::RFP32RegClass;
-      }
-      break;
-    case MVT::f64:
-      if (X86ScalarSSEf64) {
-        Opc = X86::FsFLD0SD;
-        RC  = &X86::FR64RegClass;
-      } else {
-        Opc = X86::LD_Fp064;
-        RC  = &X86::RFP64RegClass;
-      }
-      break;
-    case MVT::f80:
-      // No f80 support yet.
-      return false;
+  default: return false;
+  case MVT::f32:
+    if (X86ScalarSSEf32) {
+      Opc = X86::FsFLD0SS;
+      RC  = &X86::FR32RegClass;
+    } else {
+      Opc = X86::LD_Fp032;
+      RC  = &X86::RFP32RegClass;
+    }
+    break;
+  case MVT::f64:
+    if (X86ScalarSSEf64) {
+      Opc = X86::FsFLD0SD;
+      RC  = &X86::FR64RegClass;
+    } else {
+      Opc = X86::LD_Fp064;
+      RC  = &X86::RFP64RegClass;
+    }
+    break;
+  case MVT::f80:
+    // No f80 support yet.
+    return false;
   }
 
   unsigned ResultReg = createResultReg(RC);
@@ -2169,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
   if (!X86SelectAddress(LI->getOperand(0), AM))
     return false;
 
-  X86InstrInfo &XII = (X86InstrInfo&)TII;
+  const X86InstrInfo &XII = (const X86InstrInfo&)TII;
 
   unsigned Size = TD.getTypeAllocSize(LI->getType());
   unsigned Alignment = LI->getAlignment();
@@ -2188,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
 
 
 namespace llvm {
-  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
-    return new X86FastISel(funcInfo);
+  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
+                                const TargetLibraryInfo *libInfo) {
+    return new X86FastISel(funcInfo, libInfo);
   }
 }
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 711ee41..955c75a 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
   // Change from the pseudo instruction to the concrete instruction.
   MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-  
+
   // Result gets pushed on the stack.
   pushReg(DestReg);
 }
@@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
   } else {
     moveToTop(Reg, I);            // Move to the top of the stack...
   }
-  
+
   // Convert from the pseudo instruction to the concrete instruction.
   MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
@@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
   MI->RemoveOperand(1);
   MI->getOperand(0).setReg(getSTReg(Op1));
   MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
-  
+
   // If we kill the second operand, make sure to pop it from the stack.
   if (Op0 != Op1 && KillsOp1) {
     // Get this value off of the register stack.
@@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
       // Assert that the top of stack contains the right FP register.
       assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
              "Top of stack not the right register for RET!");
-      
+
       // Ok, everything is good, mark the value as not being on the stack
       // anymore so that our assertion about the stack being empty at end of
       // block doesn't fire.
       StackTop = 0;
       return;
     }
-    
+
     // Otherwise, we are returning two values:
     // 2) If returning the same value for both, we only have one thing in the FP
     //    stack.  Consider:  RET FP1, FP1
     if (StackTop == 1) {
       assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
              "Stack misconfiguration for RET!");
-      
+
       // Duplicate the TOS so that we return it twice.  Just pick some other FPx
       // register to hold it.
       unsigned NewReg = getScratchReg();
       duplicateToTop(FirstFPRegOp, NewReg, MI);
       FirstFPRegOp = NewReg;
     }
-    
+
     /// Okay we know we have two different FPx operands now:
     assert(StackTop == 2 && "Must have two values live!");
-    
+
     /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
     ///    in ST(1).  In this case, emit an fxch.
     if (getStackEntry(0) == SecondFPRegOp) {
       assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
       moveToTop(FirstFPRegOp, MI);
     }
-    
+
     /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
     /// ST(1).  Just remove both from our understanding of the stack and return.
     assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5186482..27195b4 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -60,7 +60,7 @@ namespace {
     int Base_FrameIndex;
 
     unsigned Scale;
-    SDValue IndexReg; 
+    SDValue IndexReg;
     int32_t Disp;
     SDValue Segment;
     const GlobalValue *GV;
@@ -80,11 +80,11 @@ namespace {
     bool hasSymbolicDisplacement() const {
       return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
     }
-    
+
     bool hasBaseOrIndexReg() const {
       return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
     }
-    
+
     /// isRIPRelative - Return true if this addressing mode is already RIP
     /// relative.
     bool isRIPRelative() const {
@@ -94,7 +94,7 @@ namespace {
         return RegNode->getReg() == X86::RIP;
       return false;
     }
-    
+
     void setBaseReg(SDValue Reg) {
       BaseType = RegBase;
       Base_Reg = Reg;
@@ -104,7 +104,7 @@ namespace {
       dbgs() << "X86ISelAddressMode " << this << '\n';
       dbgs() << "Base_Reg ";
       if (Base_Reg.getNode() != 0)
-        Base_Reg.getNode()->dump(); 
+        Base_Reg.getNode()->dump();
       else
         dbgs() << "nul";
       dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
@@ -113,7 +113,7 @@ namespace {
       if (IndexReg.getNode() != 0)
         IndexReg.getNode()->dump();
       else
-        dbgs() << "nul"; 
+        dbgs() << "nul";
       dbgs() << " Disp " << Disp << '\n'
              << "GV ";
       if (GV)
@@ -213,21 +213,21 @@ namespace {
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &NodeWithChain);
-    
+
     bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
                      SDValue &Index, SDValue &Disp,
                      SDValue &Segment);
-    
+
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.
     virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                               char ConstraintCode,
                                               std::vector<SDValue> &OutOps);
-    
+
     void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
 
-    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 
+    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
                                    SDValue &Scale, SDValue &Index,
                                    SDValue &Disp, SDValue &Segment) {
       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
@@ -426,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 void X86DAGToDAGISel::PreprocessISelDAG() {
   // OptForSize is used in pattern predicates that isel is matching.
   OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
-  
+
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
@@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       ++NumLoadMoved;
       continue;
     }
-    
+
     // Lower fpround and fpextend nodes that target the FP stack to be store and
     // load to the stack.  This is a gross hack.  We would like to simply mark
     // these as being illegal, but when we do that, legalize produces these when
@@ -473,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     // FIXME: This should only happen when not compiled with -O0.
     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
       continue;
-    
+
     EVT SrcVT = N->getOperand(0).getValueType();
     EVT DstVT = N->getValueType(0);
 
@@ -496,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       if (N->getConstantOperandVal(1))
         continue;
     }
-   
+
     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
     // FPStack has extload and truncstore.  SSE can fold direct loads into other
     // operations.  Based on this, decide what we want to do.
@@ -505,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
     else
       MemVT = SrcIsSSE ? SrcVT : DstVT;
-    
+
     SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
     DebugLoc dl = N->getDebugLoc();
-    
+
     // FIXME: optimize the case where the src/dest is a load or store?
     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
                                           N->getOperand(0),
@@ -524,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     // To avoid invalidating 'I', back it up to the convert node.
     --I;
     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
-    
+
     // Now that we did that, the node is dead.  Increment the iterator to the
     // next node to process, then delete N.
     ++I;
     CurDAG->DeleteNode(N);
-  }  
+  }
 }
 
 
@@ -584,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
 
 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
-  
+
   // load gs:0 -> GS segment register.
   // load fs:0 -> FS segment register.
   //
@@ -593,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   // For more information see http://people.redhat.com/drepper/tls.pdf
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
     if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
-        Subtarget->isTargetELF())
+        Subtarget->isTargetLinux())
       switch (N->getPointerInfo().getAddrSpace()) {
       case 256:
         AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -602,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
         AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
         return false;
       }
-  
+
   return true;
 }
 
@@ -992,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   case ISD::SHL:
     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
       break;
-      
+
     if (ConstantSDNode
           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
       unsigned Val = CN->getZExtValue();
@@ -1167,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
         !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
       return false;
     AM = Backup;
-    
+
     // Try again after commuting the operands.
     if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
         !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
@@ -1203,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       AM = Backup;
     }
     break;
-      
+
   case ISD::AND: {
     // Perform some heroic transforms on an and of a constant-count shift
     // with a constant to enable use of the scaled offset field.
@@ -1275,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
-  
+
   if (Parent &&
       // This list of opcodes are all the nodes that have an "addr:$ptr" operand
       // that are not a MemSDNode, and thus don't have proper addrspace info.
@@ -1290,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
     if (AddrSpace == 257)
       AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
   }
-  
+
   if (MatchAddress(N, AM))
     return false;
 
@@ -1336,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
   // elements.  This is a vector shuffle from the zero vector.
   if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
       // Check to see if the top elements are all zeros (or bitcast of zeros).
-      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 
+      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
       N.getOperand(0).getNode()->hasOneUse() &&
       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
       N.getOperand(0).getOperand(0).hasOneUse() &&
@@ -1411,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   // If it isn't worth using an LEA, reject it.
   if (Complexity <= 2)
     return false;
-  
+
   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
   return true;
 }
@@ -1422,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Disp, SDValue &Segment) {
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-    
+
   X86ISelAddressMode AM;
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
@@ -1435,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
   } else {
     AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
   }
-  
+
   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
   return true;
 }
@@ -1449,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
       !IsProfitableToFold(N, P, P) ||
       !IsLegalToFold(N, P, P, OptLevel))
     return false;
-  
+
   return SelectAddr(N.getNode(),
                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
@@ -1700,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
 SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   if (Node->hasAnyUseOfValue(0))
     return 0;
-  
+
   // Optimize common patterns for __sync_or_and_fetch and similar arith
   // operations where the result is not used. This allows us to use the "lock"
   // version of the arithmetic instruction.
@@ -1727,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
     default:
       return 0;
   }
-  
+
   bool isCN = false;
   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
   if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
     isCN = true;
     Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
   }
-  
+
   unsigned Opc = 0;
   switch (NVT.getSimpleVT().SimpleTy) {
     default: return 0;
@@ -1772,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
       }
       break;
   }
-  
+
   assert(Opc != 0 && "Invalid arith lock transform!");
 
   DebugLoc dl = Node->getDebugLoc();
@@ -1852,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
 /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
 /// is suitable for doing the {load; increment or decrement; store} to modify
 /// transformation.
-static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
                                 SDValue StoredVal, SelectionDAG *CurDAG,
                                 LoadSDNode* &LoadNode, SDValue &InputChain) {
 
@@ -1876,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
   // Return LoadNode by reference.
   LoadNode = cast<LoadSDNode>(Load);
   // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
-  EVT LdVT = LoadNode->getMemoryVT();    
-  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 
+  EVT LdVT = LoadNode->getMemoryVT();
+  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
       LdVT != MVT::i8)
     return false;
 
   // Is store the only read of the loaded value?
   if (!Load.hasOneUse())
     return false;
-  
+
   // Is the address of the store the same as the load?
   if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
       LoadNode->getOffset() != StoreNode->getOffset())
@@ -1990,7 +1990,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   unsigned Opc, MOpc;
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
-  
+
   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
 
   if (Node->isMachineOpcode()) {
@@ -2062,7 +2062,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   case X86ISD::ATOMSWAP64_DAG: {
     unsigned Opc;
     switch (Opcode) {
-    default: llvm_unreachable("Impossible intrinsic");
+    default: llvm_unreachable("Impossible opcode");
     case X86ISD::ATOMOR64_DAG:   Opc = X86::ATOMOR6432;   break;
     case X86ISD::ATOMXOR64_DAG:  Opc = X86::ATOMXOR6432;  break;
     case X86ISD::ATOMADD64_DAG:  Opc = X86::ATOMADD6432;  break;
@@ -2119,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
       break;
 
-    unsigned ShlOp, Op = 0;
+    unsigned ShlOp, Op;
     EVT CstVT = NVT;
 
     // Check the minimum bitwidth for the new constant.
@@ -2142,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       ShlOp = X86::SHL32ri;
 
       switch (Opcode) {
+      default: llvm_unreachable("Impossible opcode");
       case ISD::AND: Op = X86::AND32ri8; break;
       case ISD::OR:  Op =  X86::OR32ri8; break;
       case ISD::XOR: Op = X86::XOR32ri8; break;
@@ -2152,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       ShlOp = X86::SHL64ri;
 
       switch (Opcode) {
+      default: llvm_unreachable("Impossible opcode");
       case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
       case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
       case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
@@ -2168,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   case X86ISD::UMUL: {
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
-    
+
     unsigned LoReg;
     switch (NVT.getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unsupported VT!");
@@ -2177,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
     case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
     }
-    
+
     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
                                           N0, SDValue()).getValue(1);
-    
+
     SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
     SDValue Ops[] = {N1, InFlag};
     SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
-    
+
     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
     ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
     return NULL;
   }
-      
+
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI: {
     SDValue N0 = Node->getOperand(0);
@@ -2287,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       ReplaceUses(SDValue(Node, 1), Result);
       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
-    
+
     return NULL;
   }
 
@@ -2438,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     return NULL;
   }
 
-  case X86ISD::CMP: {
+  case X86ISD::CMP:
+  case X86ISD::SUB: {
+    // Sometimes a SUB is used to perform comparison.
+    if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
+      // This node is not a CMP.
+      break;
     SDValue N0 = Node->getOperand(0);
     SDValue N1 = Node->getOperand(1);
 
@@ -2555,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     // a simple increment or decrement through memory of that value, if the
     // uses of the modified value and its address are suitable.
     // The DEC64m tablegen pattern is currently not able to match the case where
-    // the EFLAGS on the original DEC are used. (This also applies to 
+    // the EFLAGS on the original DEC are used. (This also applies to
     // {INC,DEC}X{64,32,16,8}.)
     // We'll need to improve tablegen to allow flags to be transferred from a
     // node in the pattern to the result node.  probably with a new keyword
@@ -2587,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     MemOp[0] = StoreNode->getMemOperand();
     MemOp[1] = LoadNode->getMemOperand();
     const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
-    EVT LdVT = LoadNode->getMemoryVT();    
+    EVT LdVT = LoadNode->getMemoryVT();
     unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
     MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
                                                    Node->getDebugLoc(),
@@ -2600,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
     return Result;
   }
+
+  // FIXME: Custom handling because TableGen doesn't support multiple implicit
+  // defs in an instruction pattern
+  case X86ISD::PCMPESTRI: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    SDValue N3 = Node->getOperand(3);
+    SDValue N4 = Node->getOperand(4);
+
+    // Make sure last argument is a constant
+    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4);
+    if (!Cst)
+      break;
+
+    uint64_t Imm = Cst->getZExtValue();
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+                                          X86::EAX, N1, SDValue()).getValue(1);
+    InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+                                  N3, InFlag).getValue(1);
+
+    SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag };
+    unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr :
+                                         X86::PCMPESTRIrr;
+    InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+                                            array_lengthof(Ops)), 0);
+
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::ECX, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+    }
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::EFLAGS, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+    }
+
+    return NULL;
+  }
+
+  // FIXME: Custom handling because TableGen doesn't support multiple implicit
+  // defs in an instruction pattern
+  case X86ISD::PCMPISTRI: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+
+    // Make sure last argument is a constant
+    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2);
+    if (!Cst)
+      break;
+
+    uint64_t Imm = Cst->getZExtValue();
+
+    SDValue Ops[] = { N0, N1, getI8Imm(Imm) };
+    unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr :
+                                         X86::PCMPISTRIrr;
+    SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops,
+                                                    array_lengthof(Ops)), 0);
+
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::ECX, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+    }
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::EFLAGS, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+    }
+
+    return NULL;
+  }
   }
 
   SDNode *ResNode = SelectCode(Node);
@@ -2627,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
       return true;
     break;
   }
-  
+
   OutOps.push_back(Op0);
   OutOps.push_back(Op1);
   OutOps.push_back(Op2);
@@ -2636,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   return false;
 }
 
-/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// createX86ISelDag - This pass converts a legalized DAG into a
 /// X86-specific DAG, ready for instruction scheduling.
 ///
 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b88f2fa..7954170 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -66,7 +66,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
 static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal,
                                    SelectionDAG &DAG, DebugLoc dl) {
   EVT VT = Vec.getValueType();
-  assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+  assert(VT.is256BitVector() && "Unexpected vector size!");
   EVT ElVT = VT.getVectorElementType();
   unsigned Factor = VT.getSizeInBits()/128;
   EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
@@ -105,7 +105,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec,
     return Result;
 
   EVT VT = Vec.getValueType();
-  assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+  assert(VT.is128BitVector() && "Unexpected vector size!");
 
   EVT ElVT = VT.getVectorElementType();
   EVT ResultVT = Result.getValueType();
@@ -174,7 +174,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // For 64-bit since we have so many registers use the ILP scheduler, for
   // 32-bit code use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.
-  if (Subtarget->isAtom()) 
+  if (Subtarget->isAtom())
     setSchedulingPreference(Sched::ILP);
   else if (Subtarget->is64Bit())
     setSchedulingPreference(Sched::ILP);
@@ -731,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FMA,  (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
@@ -828,7 +829,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v4f32, Custom);
   }
 
   if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
@@ -869,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2f64, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2i64, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i8, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i16, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i32, Custom);
-
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
-      EVT VT = (MVT::SimpleValueType)i;
+      MVT VT = (MVT::SimpleValueType)i;
       // Do not attempt to custom lower non-power-of-2 vectors
       if (!isPowerOf2_32(VT.getVectorNumElements()))
         continue;
       // Do not attempt to custom lower non-128-bit vectors
       if (!VT.is128BitVector())
         continue;
-      setOperationAction(ISD::BUILD_VECTOR,
-                         VT.getSimpleVT().SimpleTy, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,
-                         VT.getSimpleVT().SimpleTy, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT,
-                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     }
 
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
@@ -906,23 +897,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
     for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
-      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
-      EVT VT = SVT;
+      MVT VT = (MVT::SimpleValueType)i;
 
       // Do not attempt to promote non-128-bit vectors
       if (!VT.is128BitVector())
         continue;
 
-      setOperationAction(ISD::AND,    SVT, Promote);
-      AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
-      setOperationAction(ISD::OR,     SVT, Promote);
-      AddPromotedToType (ISD::OR,     SVT, MVT::v2i64);
-      setOperationAction(ISD::XOR,    SVT, Promote);
-      AddPromotedToType (ISD::XOR,    SVT, MVT::v2i64);
-      setOperationAction(ISD::LOAD,   SVT, Promote);
-      AddPromotedToType (ISD::LOAD,   SVT, MVT::v2i64);
-      setOperationAction(ISD::SELECT, SVT, Promote);
-      AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
     }
 
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -1009,9 +999,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
   }
 
-  if (Subtarget->hasSSE42())
-    setOperationAction(ISD::SETCC,             MVT::v2i64, Custom);
-
   if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1042,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
 
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4f64,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i64,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f32,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i32,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i8,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i16, Custom);
-
     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
 
@@ -1072,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSELECT,           MVT::v8i32, Legal);
     setOperationAction(ISD::VSELECT,           MVT::v8f32, Legal);
 
+    if (Subtarget->hasFMA()) {
+      setOperationAction(ISD::FMA,             MVT::v8f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::v4f64, Custom);
+      setOperationAction(ISD::FMA,             MVT::v4f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::v2f64, Custom);
+      setOperationAction(ISD::FMA,             MVT::f32, Custom);
+      setOperationAction(ISD::FMA,             MVT::f64, Custom);
+    }
+
     if (Subtarget->hasAVX2()) {
       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
@@ -1125,45 +1114,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Custom lower several nodes for 256-bit types.
     for (int i = MVT::FIRST_VECTOR_VALUETYPE;
              i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
-      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
-      EVT VT = SVT;
+      MVT VT = (MVT::SimpleValueType)i;
 
       // Extract subvector is special because the value type
       // (result) is 128-bit but the source is 256-bit wide.
       if (VT.is128BitVector())
-        setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+        setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
 
       // Do not attempt to custom lower other non-256-bit vectors
       if (!VT.is256BitVector())
         continue;
 
-      setOperationAction(ISD::BUILD_VECTOR,       SVT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     SVT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  SVT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
-      setOperationAction(ISD::SCALAR_TO_VECTOR,   SVT, Custom);
-      setOperationAction(ISD::INSERT_SUBVECTOR,   SVT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
     }
 
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
     for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
-      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
-      EVT VT = SVT;
+      MVT VT = (MVT::SimpleValueType)i;
 
       // Do not attempt to promote non-256-bit vectors
       if (!VT.is256BitVector())
         continue;
 
-      setOperationAction(ISD::AND,    SVT, Promote);
-      AddPromotedToType (ISD::AND,    SVT, MVT::v4i64);
-      setOperationAction(ISD::OR,     SVT, Promote);
-      AddPromotedToType (ISD::OR,     SVT, MVT::v4i64);
-      setOperationAction(ISD::XOR,    SVT, Promote);
-      AddPromotedToType (ISD::XOR,    SVT, MVT::v4i64);
-      setOperationAction(ISD::LOAD,   SVT, Promote);
-      AddPromotedToType (ISD::LOAD,   SVT, MVT::v4i64);
-      setOperationAction(ISD::SELECT, SVT, Promote);
-      AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
     }
   }
 
@@ -1221,6 +1209,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::FSUB);
+  setTargetDAGCombine(ISD::FMA);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
@@ -1718,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
 /// CallIsStructReturn - Determines whether a call uses struct return
 /// semantics.
-static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+enum StructReturnType {
+  NotStructReturn,
+  RegStructReturn,
+  StackStructReturn
+};
+static StructReturnType
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
   if (Outs.empty())
-    return false;
+    return NotStructReturn;
 
-  return Outs[0].Flags.isSRet();
+  const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
+  if (!Flags.isSRet())
+    return NotStructReturn;
+  if (Flags.isInReg())
+    return RegStructReturn;
+  return StackStructReturn;
 }
 
 /// ArgsAreStructReturn - Determines whether a function uses struct
 /// return semantics.
-static bool
-ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+static StructReturnType
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
   if (Ins.empty())
-    return false;
+    return NotStructReturn;
 
-  return Ins[0].Flags.isSRet();
+  const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
+  if (!Flags.isSRet())
+    return NotStructReturn;
+  if (Flags.isInReg())
+    return RegStructReturn;
+  return StackStructReturn;
 }
 
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
@@ -1876,9 +1881,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         RC = &X86::FR32RegClass;
       else if (RegVT == MVT::f64)
         RC = &X86::FR64RegClass;
-      else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+      else if (RegVT.is256BitVector())
         RC = &X86::VR256RegClass;
-      else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+      else if (RegVT.is128BitVector())
         RC = &X86::VR128RegClass;
       else if (RegVT == MVT::x86mmx)
         RC = &X86::VR64RegClass;
@@ -2073,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
     if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
-        ArgsAreStructReturn(Ins))
+        argsAreStructReturn(Ins) == StackStructReturn)
       FuncInfo->setBytesToPopOnReturn(4);
   }
 
@@ -2163,7 +2168,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isTargetWin64();
   bool IsWindows      = Subtarget->isTargetWindows();
-  bool IsStructRet    = CallIsStructReturn(Outs);
+  StructReturnType SR = callIsStructReturn(Outs);
   bool IsSibcall      = false;
 
   if (MF.getTarget().Options.DisableTailCalls)
@@ -2172,8 +2177,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
-                                                   Outs, OutVals, Ins, DAG);
+                    isVarArg, SR != NotStructReturn,
+                    MF.getFunction()->hasStructRetAttr(),
+                    Outs, OutVals, Ins, DAG);
 
     // Sibcalls are automatically detected tailcalls which do not require
     // ABI changes.
@@ -2255,7 +2261,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
       break;
     case CCValAssign::AExt:
-      if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+      if (RegVT.is128BitVector()) {
         // Special case: passing MMX values in XMM registers.
         Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
         Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
@@ -2549,7 +2555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                        getTargetMachine().Options.GuaranteedTailCallOpt))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
   else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
-           IsStructRet)
+           SR == StackStructReturn)
     // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2870,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
 }
 
 FastISel *
-X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
-  return X86::createFastISel(funcInfo);
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+                                  const TargetLibraryInfo *libInfo) const {
+  return X86::createFastISel(funcInfo, libInfo);
 }
 
 
@@ -3397,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
 static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
 
+  unsigned NumElems = VT.getVectorNumElements();
+
   if (NumElems != 4)
     return false;
 
@@ -3416,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
 /// <2, 3, 2, 3>
 static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
 
+  unsigned NumElems = VT.getVectorNumElements();
+
   if (NumElems != 4)
     return false;
 
@@ -3433,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
 static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -3455,10 +3462,12 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVLHPS.
 static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+  if (!VT.is128BitVector())
+    return false;
+
   unsigned NumElems = VT.getVectorNumElements();
 
-  if ((NumElems != 2 && NumElems != 4)
-      || VT.getSizeInBits() > 128)
+  if (NumElems != 2 && NumElems != 4)
     return false;
 
   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
@@ -3675,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
 static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
   if (VT.getVectorElementType().getSizeInBits() < 32)
     return false;
-  if (VT.getSizeInBits() == 256)
+  if (!VT.is128BitVector())
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -3697,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
 /// The first half comes from the second half of V1 and the second half from the
 /// the second half of V2.
 static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX || VT.getSizeInBits() != 256)
+  if (!HasAVX || !VT.is256BitVector())
     return false;
 
   // The shuffle result is divided into half A and half B. In total the two
@@ -3789,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
 /// element of vector 2 and the other elements to come from vector 1 in order.
 static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
                                bool V2IsSplat = false, bool V2IsUndef = false) {
-  unsigned NumOps = VT.getVectorNumElements();
-  if (VT.getSizeInBits() == 256)
+  if (!VT.is128BitVector())
     return false;
+
+  unsigned NumOps = VT.getVectorNumElements();
   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
     return false;
 
@@ -3857,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
 static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  unsigned NumElts = VT.getVectorNumElements();
+  if (!HasAVX || !VT.is256BitVector())
+    return false;
 
-  if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
+  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts != 4)
     return false;
 
   for (unsigned i = 0; i != NumElts/2; ++i)
@@ -3875,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
 /// specifies a shuffle of elements that is suitable for input to 128-bit
 /// version of MOVDDUP.
 static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
 
   unsigned e = VT.getVectorNumElements() / 2;
@@ -4120,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
 /// V1 (and in order), and the upper half elements should come from the upper
 /// half of V2 (and in order).
 static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
   if (VT.getVectorNumElements() != 4)
     return false;
@@ -4177,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) {
 /// MOVLP, it must be either a vector load or a scalar load to vector.
 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
                                ArrayRef<int> Mask, EVT VT) {
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return false;
 
   if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
@@ -4719,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
                           bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
   // Although the logic below support any bitwidth size, there are no
   // shift instructions which handle more than 128-bit vectors.
-  if (SVOp->getValueType(0).getSizeInBits() > 128)
+  if (!SVOp->getValueType(0).is128BitVector())
     return false;
 
   if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
@@ -4814,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
 static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          unsigned NumBits, SelectionDAG &DAG,
                          const TargetLowering &TLI, DebugLoc dl) {
-  assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
+  assert(VT.is128BitVector() && "Unknown type for VShift");
   EVT ShVT = MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
@@ -5047,7 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
     }
   }
 
-  bool Is256 = VT.getSizeInBits() == 256;
+  bool Is256 = VT.is256BitVector();
 
   // Handle the broadcasting a single constant scalar from the constant pool
   // into a vector. On Sandybridge it is still better to load a constant vector
@@ -5102,6 +5114,86 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDNode *N = Op.getNode();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = Op.getNumOperands();
+
+  // Check supported types and sub-targets.
+  //
+  // Only v2f32 -> v2f64 needs special handling.
+  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+    return SDValue();
+
+  SDValue VecIn;
+  EVT VecInVT;
+  SmallVector<int, 8> Mask;
+  EVT SrcVT = MVT::Other;
+
+  // Check the patterns could be translated into X86vfpext.
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opcode = In.getOpcode();
+
+    // Skip if the element is undefined.
+    if (Opcode == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    // Quit if one of the elements is not defined from 'fpext'.
+    if (Opcode != ISD::FP_EXTEND)
+      return SDValue();
+
+    // Check how the source of 'fpext' is defined.
+    SDValue L2In = In.getOperand(0);
+    EVT L2InVT = L2In.getValueType();
+
+    // Check the original type
+    if (SrcVT == MVT::Other)
+      SrcVT = L2InVT;
+    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+      return SDValue();
+
+    // Check whether the value being 'fpext'ed is extracted from the same
+    // source.
+    Opcode = L2In.getOpcode();
+
+    // Quit if it's not extracted with a constant index.
+    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(L2In.getOperand(1)))
+      return SDValue();
+
+    SDValue ExtractedFromVec = L2In.getOperand(0);
+
+    if (VecIn.getNode() == 0) {
+      VecIn = ExtractedFromVec;
+      VecInVT = ExtractedFromVec.getValueType();
+    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+      return SDValue();
+
+    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+  }
+
+  // Quit if all operands of BUILD_VECTOR are undefined.
+  if (!VecIn.getNode())
+    return SDValue();
+
+  // Fill the remaining mask as undef.
+  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+    Mask.push_back(-1);
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getVectorShuffle(VecInVT, DL,
+                                          VecIn, DAG.getUNDEF(VecInVT),
+                                          &Mask[0]));
+}
+
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -5134,6 +5226,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   if (Broadcast.getNode())
     return Broadcast;
 
+  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+  if (FpExt.getNode())
+    return FpExt;
+
   unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumZero  = 0;
@@ -5209,12 +5305,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
-        if (VT.getSizeInBits() == 256) {
+        if (VT.is256BitVector()) {
           SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
           return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
                              Item, DAG.getIntPtrConstant(0));
         }
-        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+        assert(VT.is128BitVector() && "Expected an SSE value type!");
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
         return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
@@ -5223,11 +5319,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
-        if (VT.getSizeInBits() == 256) {
+        if (VT.is256BitVector()) {
           SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
           Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
         } else {
-          assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+          assert(VT.is128BitVector() && "Expected an SSE value type!");
           Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
         }
         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5287,7 +5383,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // For AVX-length vectors, build the individual 128-bit pieces and use
   // shuffles to put them in place.
-  if (VT.getSizeInBits() == 256) {
+  if (VT.is256BitVector()) {
     SmallVector<SDValue, 32> V;
     for (unsigned i = 0; i != NumElems; ++i)
       V.push_back(Op.getOperand(i));
@@ -5368,7 +5464,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
   }
 
-  if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+  if (Values.size() > 1 && VT.is128BitVector()) {
     // Check for a build vector of consecutive loads.
     for (unsigned i = 0; i < NumElems; ++i)
       V[i] = Op.getOperand(i);
@@ -5429,39 +5525,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place
-// them in a MMX register.  This is better than doing a stack convert.
-static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
-  DebugLoc dl = Op.getDebugLoc();
-  EVT ResVT = Op.getValueType();
-
-  assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
-         ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
-  int Mask[2];
-  SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
-  SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
-  InVec = Op.getOperand(1);
-  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-    unsigned NumElts = ResVT.getVectorNumElements();
-    VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
-                       InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
-  } else {
-    InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
-    SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
-    Mask[0] = 0; Mask[1] = 2;
-    VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
-  }
-  return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
-}
-
 // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
 // to create 256-bit vectors from two other 128-bit ones.
 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   EVT ResVT = Op.getValueType();
 
-  assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide");
+  assert(ResVT.is256BitVector() && "Value type must be 256-bit wide");
 
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
@@ -5472,16 +5542,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
 
 SDValue
 X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
-  EVT ResVT = Op.getValueType();
-
   assert(Op.getNumOperands() == 2);
-  assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) &&
-         "Unsupported CONCAT_VECTORS for value type");
-
-  // We support concatenate two MMX registers and place them in a MMX register.
-  // This is better than doing a stack convert.
-  if (ResVT.is128BitVector())
-    return LowerMMXCONCAT_VECTORS(Op, DAG);
 
   // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors
   // from two other 128-bit ones.
@@ -6131,7 +6192,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
   DebugLoc dl = SVOp->getDebugLoc();
   EVT VT = SVOp->getValueType(0);
 
-  assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
+  assert(VT.is128BitVector() && "Unsupported vector size");
 
   std::pair<int, int> Locs[4];
   int Mask1[] = { -1, -1, -1, -1 };
@@ -6759,7 +6820,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   // Handle all 128-bit wide vectors with 4 elements, and match them with
   // several different shuffle types.
-  if (NumElems == 4 && VT.getSizeInBits() == 128)
+  if (NumElems == 4 && VT.is128BitVector())
     return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG);
 
   // Handle general 256-bit shuffles
@@ -6775,7 +6836,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
-  if (Op.getOperand(0).getValueType().getSizeInBits() != 128)
+  if (!Op.getOperand(0).getValueType().is128BitVector())
     return SDValue();
 
   if (VT.getSizeInBits() == 8) {
@@ -6845,7 +6906,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   // If this is a 256-bit vector result, first extract the 128-bit vector and
   // then extract the element from the 128-bit vector.
-  if (VecVT.getSizeInBits() == 256) {
+  if (VecVT.is256BitVector()) {
     DebugLoc dl = Op.getNode()->getDebugLoc();
     unsigned NumElems = VecVT.getVectorNumElements();
     SDValue Idx = Op.getOperand(1);
@@ -6860,7 +6921,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                        DAG.getConstant(IdxVal, MVT::i32));
   }
 
-  assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+  assert(VecVT.is128BitVector() && "Unexpected vector length");
 
   if (Subtarget->hasSSE41()) {
     SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
@@ -6936,7 +6997,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
   SDValue N1 = Op.getOperand(1);
   SDValue N2 = Op.getOperand(2);
 
-  if (VT.getSizeInBits() == 256)
+  if (!VT.is128BitVector())
     return SDValue();
 
   if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
@@ -6992,7 +7053,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
 
   // If this is a 256-bit vector result, first extract the 128-bit vector,
   // insert the element into the extracted half and then place it back.
-  if (VT.getSizeInBits() == 256) {
+  if (VT.is256BitVector()) {
     if (!isa<ConstantSDNode>(N2))
       return SDValue();
 
@@ -7036,7 +7097,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // If this is a 256-bit vector result, first insert into a 128-bit
   // vector and then insert into the 256-bit vector.
-  if (OpVT.getSizeInBits() > 128) {
+  if (!OpVT.is128BitVector()) {
     // Insert into a 128-bit vector.
     EVT VT128 = EVT::getVectorVT(*Context,
                                  OpVT.getVectorElementType(),
@@ -7053,7 +7114,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
 
   SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
-  assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!");
+  assert(OpVT.is128BitVector() && "Expected an SSE type!");
   return DAG.getNode(ISD::BITCAST, dl, OpVT,
                      DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
 }
@@ -7068,8 +7129,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
     SDValue Vec = Op.getNode()->getOperand(0);
     SDValue Idx = Op.getNode()->getOperand(1);
 
-    if (Op.getNode()->getValueType(0).getSizeInBits() == 128 &&
-        Vec.getNode()->getValueType(0).getSizeInBits() == 256 &&
+    if (Op.getNode()->getValueType(0).is128BitVector() &&
+        Vec.getNode()->getValueType(0).is256BitVector() &&
         isa<ConstantSDNode>(Idx)) {
       unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
       return Extract128BitVector(Vec, IdxVal, DAG, dl);
@@ -7089,8 +7150,8 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
     SDValue SubVec = Op.getNode()->getOperand(1);
     SDValue Idx = Op.getNode()->getOperand(2);
 
-    if (Op.getNode()->getValueType(0).getSizeInBits() == 256 &&
-        SubVec.getNode()->getValueType(0).getSizeInBits() == 128 &&
+    if (Op.getNode()->getValueType(0).is256BitVector() &&
+        SubVec.getNode()->getValueType(0).is128BitVector() &&
         isa<ConstantSDNode>(Idx)) {
       unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
       return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
@@ -7735,9 +7796,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
      punpckldq  (c0),  %xmm0  // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
      subpd      (c1),  %xmm0  // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
      #ifdef __SSE3__
-       haddpd   %xmm0, %xmm0          
+       haddpd   %xmm0, %xmm0
      #else
-       pshufd   $0x4e, %xmm0, %xmm1 
+       pshufd   $0x4e, %xmm0, %xmm1
        addpd    %xmm1, %xmm0
      #endif
   */
@@ -8064,7 +8125,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
     EltVT = VT.getVectorElementType();
   Constant *C;
   if (EltVT == MVT::f64) {
-    C = ConstantVector::getSplat(2, 
+    C = ConstantVector::getSplat(2,
                 ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
   } else {
     C = ConstantVector::getSplat(4,
@@ -8098,7 +8159,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
                              MachinePointerInfo::getConstantPool(),
                              false, false, false, 16);
   if (VT.isVector()) {
-    MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
+    MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
     return DAG.getNode(ISD::BITCAST, dl, VT,
                        DAG.getNode(ISD::XOR, dl, XORVT,
                                    DAG.getNode(ISD::BITCAST, dl, XORVT,
@@ -8226,7 +8287,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
 
   unsigned Opcode = 0;
   unsigned NumOperands = 0;
-  switch (Op.getNode()->getOpcode()) {
+
+  // Truncate operations may prevent the merge of the SETCC instruction
+  // and the arithmetic intruction before it. Attempt to truncate the operands
+  // of the arithmetic instruction and use a reduced bit-width instruction.
+  bool NeedTruncation = false;
+  SDValue ArithOp = Op;
+  if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) {
+    SDValue Arith = Op->getOperand(0);
+    // Both the trunc and the arithmetic op need to have one user each.
+    if (Arith->hasOneUse())
+      switch (Arith.getOpcode()) {
+        default: break;
+        case ISD::ADD:
+        case ISD::SUB:
+        case ISD::AND:
+        case ISD::OR:
+        case ISD::XOR: {
+          NeedTruncation = true;
+          ArithOp = Arith;
+        }
+      }
+  }
+
+  // NOTICE: In the code below we use ArithOp to hold the arithmetic operation
+  // which may be the result of a CAST.  We use the variable 'Op', which is the
+  // non-casted variable when we check for possible users.
+  switch (ArithOp.getOpcode()) {
   case ISD::ADD:
     // Due to an isel shortcoming, be conservative if this add is likely to be
     // selected as part of a load-modify-store instruction. When the root node
@@ -8246,7 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
         goto default_case;
 
     if (ConstantSDNode *C =
-        dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+        dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
       // An add of one will be selected as an INC.
       if (C->getAPIntValue() == 1) {
         Opcode = X86ISD::INC;
@@ -8282,7 +8369,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
 
       if (User->getOpcode() != ISD::BRCOND &&
           User->getOpcode() != ISD::SETCC &&
-          (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+          !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
         NonFlagUse = true;
         break;
       }
@@ -8303,15 +8390,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
         goto default_case;
 
     // Otherwise use a regular EFLAGS-setting instruction.
-    switch (Op.getNode()->getOpcode()) {
+    switch (ArithOp.getOpcode()) {
     default: llvm_unreachable("unexpected operator!");
-    case ISD::SUB:
-      // If the only use of SUB is EFLAGS, use CMP instead.
-      if (Op.hasOneUse())
-        Opcode = X86ISD::CMP;
-      else
-        Opcode = X86ISD::SUB;
-      break;
+    case ISD::SUB: Opcode = X86ISD::SUB; break;
     case ISD::OR:  Opcode = X86ISD::OR;  break;
     case ISD::XOR: Opcode = X86ISD::XOR; break;
     case ISD::AND: Opcode = X86ISD::AND; break;
@@ -8332,19 +8413,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
     break;
   }
 
+  // If we found that truncation is beneficial, perform the truncation and
+  // update 'Op'.
+  if (NeedTruncation) {
+    EVT VT = Op.getValueType();
+    SDValue WideVal = Op->getOperand(0);
+    EVT WideVT = WideVal.getValueType();
+    unsigned ConvertedOp = 0;
+    // Use a target machine opcode to prevent further DAGCombine
+    // optimizations that may separate the arithmetic operations
+    // from the setcc node.
+    switch (WideVal.getOpcode()) {
+      default: break;
+      case ISD::ADD: ConvertedOp = X86ISD::ADD; break;
+      case ISD::SUB: ConvertedOp = X86ISD::SUB; break;
+      case ISD::AND: ConvertedOp = X86ISD::AND; break;
+      case ISD::OR:  ConvertedOp = X86ISD::OR;  break;
+      case ISD::XOR: ConvertedOp = X86ISD::XOR; break;
+    }
+
+    if (ConvertedOp) {
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) {
+        SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0));
+        SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1));
+        Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1);
+      }
+    }
+  }
+
   if (Opcode == 0)
     // Emit a CMP with 0, which is the TEST pattern.
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
                        DAG.getConstant(0, Op.getValueType()));
 
-  if (Opcode == X86ISD::CMP) {
-    SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0),
-                              Op.getOperand(1));
-    // We can't replace usage of SUB with CMP.
-    // The SUB node will be removed later because there is no use of it.
-    return SDValue(New.getNode(), 0);
-  }
-
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
   SmallVector<SDValue, 4> Ops;
   for (unsigned i = 0; i != NumOperands; ++i)
@@ -8364,6 +8466,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
       return EmitTest(Op0, X86CC, DAG);
 
   DebugLoc dl = Op0.getDebugLoc();
+  if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
+       Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
+    // Use SUB instead of CMP to enable CSE between SUB and CMP.
+    SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+    SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
+                              Op0, Op1);
+    return SDValue(Sub.getNode(), 1);
+  }
   return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
 }
 
@@ -8522,7 +8632,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
 
-  assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC &&
+  assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC &&
          "Unsupported value type for operation");
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -8559,10 +8669,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
 
   if (isFP) {
-    unsigned SSECC = 8;
+#ifndef NDEBUG
     EVT EltVT = Op0.getValueType().getVectorElementType();
-    assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
+    assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+#endif
 
+    unsigned SSECC;
     bool Swap = false;
 
     // SSE Condition code mapping:
@@ -8575,7 +8687,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
     //  6 - NLE
     //  7 - ORD
     switch (SetCCOpcode) {
-    default: break;
+    default: llvm_unreachable("Unexpected SETCC condition");
     case ISD::SETOEQ:
     case ISD::SETEQ:  SSECC = 0; break;
     case ISD::SETOGT:
@@ -8589,34 +8701,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
     case ISD::SETUO:  SSECC = 3; break;
     case ISD::SETUNE:
     case ISD::SETNE:  SSECC = 4; break;
-    case ISD::SETULE: Swap = true;
+    case ISD::SETULE: Swap = true; // Fallthrough
     case ISD::SETUGE: SSECC = 5; break;
-    case ISD::SETULT: Swap = true;
+    case ISD::SETULT: Swap = true; // Fallthrough
     case ISD::SETUGT: SSECC = 6; break;
     case ISD::SETO:   SSECC = 7; break;
+    case ISD::SETUEQ:
+    case ISD::SETONE: SSECC = 8; break;
     }
     if (Swap)
       std::swap(Op0, Op1);
 
     // In the two special cases we can't handle, emit two comparisons.
     if (SSECC == 8) {
+      unsigned CC0, CC1;
+      unsigned CombineOpc;
       if (SetCCOpcode == ISD::SETUEQ) {
-        SDValue UNORD, EQ;
-        UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
-                            DAG.getConstant(3, MVT::i8));
-        EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
-                         DAG.getConstant(0, MVT::i8));
-        return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
-      }
-      if (SetCCOpcode == ISD::SETONE) {
-        SDValue ORD, NEQ;
-        ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
-                          DAG.getConstant(7, MVT::i8));
-        NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
-                          DAG.getConstant(4, MVT::i8));
-        return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+        CC0 = 3; CC1 = 0; CombineOpc = ISD::OR;
+      } else {
+        assert(SetCCOpcode == ISD::SETONE);
+        CC0 = 7; CC1 = 4; CombineOpc = ISD::AND;
       }
-      llvm_unreachable("Illegal FP comparison");
+
+      SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                                 DAG.getConstant(CC0, MVT::i8));
+      SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                                 DAG.getConstant(CC1, MVT::i8));
+      return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
     }
     // Handle all other FP comparisons here.
     return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
@@ -8624,17 +8735,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Break 256-bit integer vector compare into smaller ones.
-  if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasAVX2())
     return Lower256IntVSETCC(Op, DAG);
 
   // We are handling one of the integer comparisons here.  Since SSE only has
   // GT and EQ comparisons for integer, swapping operands and multiple
   // operations may be required for some comparisons.
-  unsigned Opc = 0;
+  unsigned Opc;
   bool Swap = false, Invert = false, FlipSigns = false;
 
   switch (SetCCOpcode) {
-  default: break;
+  default: llvm_unreachable("Unexpected SETCC condition");
   case ISD::SETNE:  Invert = true;
   case ISD::SETEQ:  Opc = X86ISD::PCMPEQ; break;
   case ISD::SETLT:  Swap = true;
@@ -8651,10 +8762,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
-  if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
-    return SDValue();
-  if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
-    return SDValue();
+  if (VT == MVT::v2i64) {
+    if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
+      return SDValue();
+    if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
+      return SDValue();
+  }
 
   // Since SSE has no unsigned integer comparisons, we need to flip  the sign
   // bits of the inputs before performing those operations.
@@ -8714,6 +8827,16 @@ static bool isAllOnes(SDValue V) {
   return C && C->isAllOnesValue();
 }
 
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+  if (V.getOpcode() != ISD::TRUNCATE)
+    return false;
+
+  SDValue VOp0 = V.getOperand(0);
+  unsigned InBits = VOp0.getValueSizeInBits();
+  unsigned Bits = V.getValueSizeInBits();
+  return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
+}
+
 SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   bool addTest = true;
   SDValue Cond  = Op.getOperand(0);
@@ -8728,46 +8851,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       Cond = NewCond;
   }
 
-  // Handle the following cases related to max and min:
-  // (a > b) ? (a-b) : 0
-  // (a >= b) ? (a-b) : 0
-  // (b < a) ? (a-b) : 0
-  // (b <= a) ? (a-b) : 0
-  // Comparison is removed to use EFLAGS from SUB.
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2))
-    if (Cond.getOpcode() == X86ISD::SETCC &&
-        Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
-        (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) &&
-        C->getAPIntValue() == 0) {
-      SDValue Cmp = Cond.getOperand(1);
-      unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
-      if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) &&
-           DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) &&
-           (CC == X86::COND_G || CC == X86::COND_GE ||
-            CC == X86::COND_A || CC == X86::COND_AE)) ||
-          (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) &&
-           DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) &&
-           (CC == X86::COND_L || CC == X86::COND_LE ||
-            CC == X86::COND_B || CC == X86::COND_BE))) {
-
-        if (Op1.getOpcode() == ISD::SUB) {
-          SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32);
-          SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs,
-                                    Op1.getOperand(0), Op1.getOperand(1));
-          DAG.ReplaceAllUsesWith(Op1, New);
-          Op1 = New;
-        }
-
-        SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
-        unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE ||
-                          CC == X86::COND_L ||
-                          CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE;
-        SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8),
-                          SDValue(Op1.getNode(), 1) };
-        return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
-      }
-    }
-
   // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
   // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
   // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
@@ -8788,11 +8871,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       // (select (x != 0), -1, 0) -> neg & sbb
       // (select (x == 0), 0, -1) -> neg & sbb
       if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
-        if (YC->isNullValue() && 
+        if (YC->isNullValue() &&
             (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
           SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
-          SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, 
-                                    DAG.getConstant(0, CmpOp0.getValueType()), 
+          SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
+                                    DAG.getConstant(0, CmpOp0.getValueType()),
                                     CmpOp0);
           SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
                                     DAG.getConstant(X86::COND_B, MVT::i8),
@@ -8883,9 +8966,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (addTest) {
-    // Look pass the truncate.
-    if (Cond.getOpcode() == ISD::TRUNCATE)
-      Cond = Cond.getOperand(0);
+    // Look pass the truncate if the high bits are known zero.
+    if (isTruncWithZeroHighBitsInput(Cond, DAG))
+        Cond = Cond.getOperand(0);
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
@@ -8908,7 +8991,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   // a <  b ?  0 : -1 -> RES = setcc_carry
   // a >= b ? -1 :  0 -> RES = setcc_carry
   // a >= b ?  0 : -1 -> RES = ~setcc_carry
-  if (Cond.getOpcode() == X86ISD::CMP) {
+  if (Cond.getOpcode() == X86ISD::SUB) {
     Cond = ConvertCmpIfNecessary(Cond, DAG);
     unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
 
@@ -9192,9 +9275,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (addTest) {
-    // Look pass the truncate.
-    if (Cond.getOpcode() == ISD::TRUNCATE)
-      Cond = Cond.getOperand(0);
+    // Look pass the truncate if the high bits are known zero.
+    if (isTruncWithZeroHighBitsInput(Cond, DAG))
+        Cond = Cond.getOperand(0);
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
@@ -9459,8 +9542,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
   SDValue ShOps[4];
   ShOps[0] = ShAmt;
   ShOps[1] = DAG.getConstant(0, MVT::i32);
-  ShOps[2] = DAG.getUNDEF(MVT::i32);
-  ShOps[3] = DAG.getUNDEF(MVT::i32);
+  ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
   ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
 
   // The return type has to be a 128-bit type with the same element
@@ -9503,8 +9585,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
   case Intrinsic::x86_sse2_ucomigt_sd:
   case Intrinsic::x86_sse2_ucomige_sd:
   case Intrinsic::x86_sse2_ucomineq_sd: {
-    unsigned Opc = 0;
-    ISD::CondCode CC = ISD::SETCC_INVALID;
+    unsigned Opc;
+    ISD::CondCode CC;
     switch (IntNo) {
     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::x86_sse_comieq_ss:
@@ -9578,55 +9660,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
                                 DAG.getConstant(X86CC, MVT::i8), Cond);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
+
   // Arithmetic intrinsics.
   case Intrinsic::x86_sse2_pmulu_dq:
   case Intrinsic::x86_avx2_pmulu_dq:
     return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+
+  // SSE3/AVX horizontal add/sub intrinsics
   case Intrinsic::x86_sse3_hadd_ps:
   case Intrinsic::x86_sse3_hadd_pd:
   case Intrinsic::x86_avx_hadd_ps_256:
   case Intrinsic::x86_avx_hadd_pd_256:
-    return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_sse3_hsub_ps:
   case Intrinsic::x86_sse3_hsub_pd:
   case Intrinsic::x86_avx_hsub_ps_256:
   case Intrinsic::x86_avx_hsub_pd_256:
-    return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_ssse3_phadd_w_128:
   case Intrinsic::x86_ssse3_phadd_d_128:
   case Intrinsic::x86_avx2_phadd_w:
   case Intrinsic::x86_avx2_phadd_d:
-    return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_ssse3_phsub_w_128:
   case Intrinsic::x86_ssse3_phsub_d_128:
   case Intrinsic::x86_avx2_phsub_w:
-  case Intrinsic::x86_avx2_phsub_d:
-    return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+  case Intrinsic::x86_avx2_phsub_d: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse3_hadd_ps:
+    case Intrinsic::x86_sse3_hadd_pd:
+    case Intrinsic::x86_avx_hadd_ps_256:
+    case Intrinsic::x86_avx_hadd_pd_256:
+      Opcode = X86ISD::FHADD;
+      break;
+    case Intrinsic::x86_sse3_hsub_ps:
+    case Intrinsic::x86_sse3_hsub_pd:
+    case Intrinsic::x86_avx_hsub_ps_256:
+    case Intrinsic::x86_avx_hsub_pd_256:
+      Opcode = X86ISD::FHSUB;
+      break;
+    case Intrinsic::x86_ssse3_phadd_w_128:
+    case Intrinsic::x86_ssse3_phadd_d_128:
+    case Intrinsic::x86_avx2_phadd_w:
+    case Intrinsic::x86_avx2_phadd_d:
+      Opcode = X86ISD::HADD;
+      break;
+    case Intrinsic::x86_ssse3_phsub_w_128:
+    case Intrinsic::x86_ssse3_phsub_d_128:
+    case Intrinsic::x86_avx2_phsub_w:
+    case Intrinsic::x86_avx2_phsub_d:
+      Opcode = X86ISD::HSUB;
+      break;
+    }
+    return DAG.getNode(Opcode, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+  }
+
+  // AVX2 variable shift intrinsics
   case Intrinsic::x86_avx2_psllv_d:
   case Intrinsic::x86_avx2_psllv_q:
   case Intrinsic::x86_avx2_psllv_d_256:
   case Intrinsic::x86_avx2_psllv_q_256:
-    return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
-                      Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_avx2_psrlv_d:
   case Intrinsic::x86_avx2_psrlv_q:
   case Intrinsic::x86_avx2_psrlv_d_256:
   case Intrinsic::x86_avx2_psrlv_q_256:
-    return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
-                      Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_avx2_psrav_d:
-  case Intrinsic::x86_avx2_psrav_d_256:
-    return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
-                      Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_avx2_psrav_d_256: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_avx2_psllv_d:
+    case Intrinsic::x86_avx2_psllv_q:
+    case Intrinsic::x86_avx2_psllv_d_256:
+    case Intrinsic::x86_avx2_psllv_q_256:
+      Opcode = ISD::SHL;
+      break;
+    case Intrinsic::x86_avx2_psrlv_d:
+    case Intrinsic::x86_avx2_psrlv_q:
+    case Intrinsic::x86_avx2_psrlv_d_256:
+    case Intrinsic::x86_avx2_psrlv_q_256:
+      Opcode = ISD::SRL;
+      break;
+    case Intrinsic::x86_avx2_psrav_d:
+    case Intrinsic::x86_avx2_psrav_d_256:
+      Opcode = ISD::SRA;
+      break;
+    }
+    return DAG.getNode(Opcode, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
     return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::x86_ssse3_psign_b_128:
   case Intrinsic::x86_ssse3_psign_w_128:
   case Intrinsic::x86_ssse3_psign_d_128:
@@ -9635,15 +9764,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
   case Intrinsic::x86_avx2_psign_d:
     return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+
   case Intrinsic::x86_sse41_insertps:
     return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
   case Intrinsic::x86_avx_vperm2f128_ps_256:
   case Intrinsic::x86_avx_vperm2f128_pd_256:
   case Intrinsic::x86_avx_vperm2f128_si_256:
   case Intrinsic::x86_avx2_vperm2i128:
     return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
   case Intrinsic::x86_avx2_permd:
   case Intrinsic::x86_avx2_permps:
     // Operands intentionally swapped. Mask is last operand to intrinsic,
@@ -9673,7 +9805,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
   case Intrinsic::x86_avx_vtestc_pd_256:
   case Intrinsic::x86_avx_vtestnzc_pd_256: {
     bool IsTestPacked = false;
-    unsigned X86CC = 0;
+    unsigned X86CC;
     switch (IntNo) {
     default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
     case Intrinsic::x86_avx_vtestz_ps:
@@ -9724,44 +9856,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
   case Intrinsic::x86_avx2_psll_w:
   case Intrinsic::x86_avx2_psll_d:
   case Intrinsic::x86_avx2_psll_q:
-    return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_sse2_psrl_w:
   case Intrinsic::x86_sse2_psrl_d:
   case Intrinsic::x86_sse2_psrl_q:
   case Intrinsic::x86_avx2_psrl_w:
   case Intrinsic::x86_avx2_psrl_d:
   case Intrinsic::x86_avx2_psrl_q:
-    return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_sse2_psra_w:
   case Intrinsic::x86_sse2_psra_d:
   case Intrinsic::x86_avx2_psra_w:
-  case Intrinsic::x86_avx2_psra_d:
-    return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+  case Intrinsic::x86_avx2_psra_d: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse2_psll_w:
+    case Intrinsic::x86_sse2_psll_d:
+    case Intrinsic::x86_sse2_psll_q:
+    case Intrinsic::x86_avx2_psll_w:
+    case Intrinsic::x86_avx2_psll_d:
+    case Intrinsic::x86_avx2_psll_q:
+      Opcode = X86ISD::VSHL;
+      break;
+    case Intrinsic::x86_sse2_psrl_w:
+    case Intrinsic::x86_sse2_psrl_d:
+    case Intrinsic::x86_sse2_psrl_q:
+    case Intrinsic::x86_avx2_psrl_w:
+    case Intrinsic::x86_avx2_psrl_d:
+    case Intrinsic::x86_avx2_psrl_q:
+      Opcode = X86ISD::VSRL;
+      break;
+    case Intrinsic::x86_sse2_psra_w:
+    case Intrinsic::x86_sse2_psra_d:
+    case Intrinsic::x86_avx2_psra_w:
+    case Intrinsic::x86_avx2_psra_d:
+      Opcode = X86ISD::VSRA;
+      break;
+    }
+    return DAG.getNode(Opcode, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+  }
+
+  // SSE/AVX immediate shift intrinsics
   case Intrinsic::x86_sse2_pslli_w:
   case Intrinsic::x86_sse2_pslli_d:
   case Intrinsic::x86_sse2_pslli_q:
   case Intrinsic::x86_avx2_pslli_w:
   case Intrinsic::x86_avx2_pslli_d:
   case Intrinsic::x86_avx2_pslli_q:
-    return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
-                               Op.getOperand(1), Op.getOperand(2), DAG);
   case Intrinsic::x86_sse2_psrli_w:
   case Intrinsic::x86_sse2_psrli_d:
   case Intrinsic::x86_sse2_psrli_q:
   case Intrinsic::x86_avx2_psrli_w:
   case Intrinsic::x86_avx2_psrli_d:
   case Intrinsic::x86_avx2_psrli_q:
-    return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
-                               Op.getOperand(1), Op.getOperand(2), DAG);
   case Intrinsic::x86_sse2_psrai_w:
   case Intrinsic::x86_sse2_psrai_d:
   case Intrinsic::x86_avx2_psrai_w:
-  case Intrinsic::x86_avx2_psrai_d:
-    return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+  case Intrinsic::x86_avx2_psrai_d: {
+    unsigned Opcode;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse2_pslli_w:
+    case Intrinsic::x86_sse2_pslli_d:
+    case Intrinsic::x86_sse2_pslli_q:
+    case Intrinsic::x86_avx2_pslli_w:
+    case Intrinsic::x86_avx2_pslli_d:
+    case Intrinsic::x86_avx2_pslli_q:
+      Opcode = X86ISD::VSHLI;
+      break;
+    case Intrinsic::x86_sse2_psrli_w:
+    case Intrinsic::x86_sse2_psrli_d:
+    case Intrinsic::x86_sse2_psrli_q:
+    case Intrinsic::x86_avx2_psrli_w:
+    case Intrinsic::x86_avx2_psrli_d:
+    case Intrinsic::x86_avx2_psrli_q:
+      Opcode = X86ISD::VSRLI;
+      break;
+    case Intrinsic::x86_sse2_psrai_w:
+    case Intrinsic::x86_sse2_psrai_d:
+    case Intrinsic::x86_avx2_psrai_w:
+    case Intrinsic::x86_avx2_psrai_d:
+      Opcode = X86ISD::VSRAI;
+      break;
+    }
+    return getTargetVShiftNode(Opcode, dl, Op.getValueType(),
                                Op.getOperand(1), Op.getOperand(2), DAG);
+  }
+
   // Fix vector shift instructions where the last operand is a non-immediate
   // i32 value.
   case Intrinsic::x86_mmx_pslli_w:
@@ -9776,8 +9957,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
     if (isa<ConstantSDNode>(ShAmt))
       return SDValue();
 
-    unsigned NewIntNo = 0;
+    unsigned NewIntNo;
     switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::x86_mmx_pslli_w:
       NewIntNo = Intrinsic::x86_mmx_psll_w;
       break;
@@ -9802,7 +9984,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
     case Intrinsic::x86_mmx_psrai_d:
       NewIntNo = Intrinsic::x86_mmx_psra_d;
       break;
-    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     }
 
     // The vector shift intrinsics with scalars uses 32b shift amounts but
@@ -9818,6 +9999,84 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
                        DAG.getConstant(NewIntNo, MVT::i32),
                        Op.getOperand(1), ShAmt);
   }
+  case Intrinsic::x86_sse42_pcmpistria128:
+  case Intrinsic::x86_sse42_pcmpestria128:
+  case Intrinsic::x86_sse42_pcmpistric128:
+  case Intrinsic::x86_sse42_pcmpestric128:
+  case Intrinsic::x86_sse42_pcmpistrio128:
+  case Intrinsic::x86_sse42_pcmpestrio128:
+  case Intrinsic::x86_sse42_pcmpistris128:
+  case Intrinsic::x86_sse42_pcmpestris128:
+  case Intrinsic::x86_sse42_pcmpistriz128:
+  case Intrinsic::x86_sse42_pcmpestriz128: {
+    unsigned Opcode;
+    unsigned X86CC;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_sse42_pcmpistria128:
+      Opcode = X86ISD::PCMPISTRI;
+      X86CC = X86::COND_A;
+      break;
+    case Intrinsic::x86_sse42_pcmpestria128:
+      Opcode = X86ISD::PCMPESTRI;
+      X86CC = X86::COND_A;
+      break;
+    case Intrinsic::x86_sse42_pcmpistric128:
+      Opcode = X86ISD::PCMPISTRI;
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_sse42_pcmpestric128:
+      Opcode = X86ISD::PCMPESTRI;
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_sse42_pcmpistrio128:
+      Opcode = X86ISD::PCMPISTRI;
+      X86CC = X86::COND_O;
+      break;
+    case Intrinsic::x86_sse42_pcmpestrio128:
+      Opcode = X86ISD::PCMPESTRI;
+      X86CC = X86::COND_O;
+      break;
+    case Intrinsic::x86_sse42_pcmpistris128:
+      Opcode = X86ISD::PCMPISTRI;
+      X86CC = X86::COND_S;
+      break;
+    case Intrinsic::x86_sse42_pcmpestris128:
+      Opcode = X86ISD::PCMPESTRI;
+      X86CC = X86::COND_S;
+      break;
+    case Intrinsic::x86_sse42_pcmpistriz128:
+      Opcode = X86ISD::PCMPISTRI;
+      X86CC = X86::COND_E;
+      break;
+    case Intrinsic::x86_sse42_pcmpestriz128:
+      Opcode = X86ISD::PCMPESTRI;
+      X86CC = X86::COND_E;
+      break;
+    }
+    SmallVector<SDValue, 5> NewOps;
+    NewOps.append(Op->op_begin()+1, Op->op_end());
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+    SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                DAG.getConstant(X86CC, MVT::i8),
+                                SDValue(PCMP.getNode(), 1));
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
+
+  case Intrinsic::x86_sse42_pcmpistri128:
+  case Intrinsic::x86_sse42_pcmpestri128: {
+    unsigned Opcode;
+    if (IntNo == Intrinsic::x86_sse42_pcmpistri128)
+      Opcode = X86ISD::PCMPISTRI;
+    else
+      Opcode = X86ISD::PCMPESTRI;
+
+    SmallVector<SDValue, 5> NewOps;
+    NewOps.append(Op->op_begin()+1, Op->op_end());
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+    return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+  }
   }
 }
 
@@ -10231,7 +10490,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
 static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
 
-  assert(VT.getSizeInBits() == 256 && VT.isInteger() &&
+  assert(VT.is256BitVector() && VT.isInteger() &&
          "Unsupported value type for operation");
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -10256,14 +10515,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) {
 }
 
 SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getValueType().getSizeInBits() == 256 &&
+  assert(Op.getValueType().is256BitVector() &&
          Op.getValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
   return Lower256IntArith(Op, DAG);
 }
 
 SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const {
-  assert(Op.getValueType().getSizeInBits() == 256 &&
+  assert(Op.getValueType().is256BitVector() &&
          Op.getValueType().isInteger() &&
          "Only handle AVX 256-bit vector integer operation");
   return Lower256IntArith(Op, DAG);
@@ -10273,7 +10532,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
   // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasAVX2())
     return Lower256IntArith(Op, DAG);
 
   assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -10503,7 +10762,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Decompose 256-bit shifts into smaller 128-bit shifts.
-  if (VT.getSizeInBits() == 256) {
+  if (VT.is256BitVector()) {
     unsigned NumElems = VT.getVectorNumElements();
     MVT EltVT = VT.getVectorElementType().getSimpleVT();
     EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
@@ -10992,9 +11251,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
   Results.push_back(Swap.getValue(1));
 }
 
-void X86TargetLowering::
+static void
 ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
-                        SelectionDAG &DAG, unsigned NewOp) const {
+                        SelectionDAG &DAG, unsigned NewOp) {
   DebugLoc dl = Node->getDebugLoc();
   assert (Node->getValueType(0) == MVT::i64 &&
           "Only know how to expand i64 atomics");
@@ -11092,7 +11351,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                Regs64bit ? X86::RBX : X86::EBX,
                                swapInL, cpInH.getValue(1));
     swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl,
-                               Regs64bit ? X86::RCX : X86::ECX, 
+                               Regs64bit ? X86::RCX : X86::ECX,
                                swapInH, swapInL.getValue(1));
     SDValue Ops[] = { swapInH.getValue(0),
                       N->getOperand(1),
@@ -11115,26 +11374,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::ATOMIC_LOAD_ADD:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_AND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_NAND:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_OR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_SUB:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
-    return;
   case ISD::ATOMIC_LOAD_XOR:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
-    return;
-  case ISD::ATOMIC_SWAP:
-    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+  case ISD::ATOMIC_SWAP: {
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected opcode");
+    case ISD::ATOMIC_LOAD_ADD:
+      Opc = X86ISD::ATOMADD64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_AND:
+      Opc = X86ISD::ATOMAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_NAND:
+      Opc = X86ISD::ATOMNAND64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_OR:
+      Opc = X86ISD::ATOMOR64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_SUB:
+      Opc = X86ISD::ATOMSUB64_DAG;
+      break;
+    case ISD::ATOMIC_LOAD_XOR:
+      Opc = X86ISD::ATOMXOR64_DAG;
+      break;
+    case ISD::ATOMIC_SWAP:
+      Opc = X86ISD::ATOMSWAP64_DAG;
+      break;
+    }
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
     return;
+  }
   case ISD::ATOMIC_LOAD:
     ReplaceATOMIC_LOAD(N, Results, DAG);
   }
@@ -11194,6 +11467,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
+  case X86ISD::FMAXC:              return "X86ISD::FMAXC";
+  case X86ISD::FMINC:              return "X86ISD::FMINC";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
   case X86ISD::FRCP:               return "X86ISD::FRCP";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
@@ -11212,7 +11487,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
   case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VSEXT_MOVL:         return "X86ISD::VSEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -11273,6 +11550,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
+  case X86ISD::FMADD:              return "X86ISD::FMADD";
+  case X86ISD::FMSUB:              return "X86ISD::FMSUB";
+  case X86ISD::FNMADD:             return "X86ISD::FNMADD";
+  case X86ISD::FNMSUB:             return "X86ISD::FNMSUB";
+  case X86ISD::FMADDSUB:           return "X86ISD::FMADDSUB";
+  case X86ISD::FMSUBADD:           return "X86ISD::FMSUBADD";
   }
 }
 
@@ -11408,7 +11691,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
   // FIXME: This collection of masks seems suspect.
   if (NumElts == 2)
     return true;
-  if (NumElts == 4 && VT.getSizeInBits() == 128) {
+  if (NumElts == 4 && VT.is128BitVector()) {
     return (isMOVLMask(Mask, VT)  ||
             isCommutedMOVLMask(Mask, VT, true) ||
             isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
@@ -11834,8 +12117,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
       MIB.addOperand(Op);
   }
   BuildMI(*BB, MI, dl,
-    TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr),
-             MI->getOperand(0).getReg())
+    TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
     .addReg(X86::XMM0);
 
   MI->eraseFromParent();
@@ -11868,24 +12150,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
 }
 
 MachineBasicBlock *
-X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
-  DebugLoc dl = MI->getDebugLoc();
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
-  // First arg in ECX, the second in EAX.
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
-    .addReg(MI->getOperand(0).getReg());
-  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
-    .addReg(MI->getOperand(1).getReg());
-
-  // The instruction doesn't actually take any operands though.
-  BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
-
-  MI->eraseFromParent(); // The pseudo is gone now.
-  return BB;
-}
-
-MachineBasicBlock *
 X86TargetLowering::EmitVAARG64WithCustomInserter(
                    MachineInstr *MI,
                    MachineBasicBlock *MBB) const {
@@ -12675,185 +12939,208 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // String/text processing lowering.
   case X86::PCMPISTRM128REG:
   case X86::VPCMPISTRM128REG:
-    return EmitPCMP(MI, BB, 3, false /* in-mem */);
   case X86::PCMPISTRM128MEM:
   case X86::VPCMPISTRM128MEM:
-    return EmitPCMP(MI, BB, 3, true /* in-mem */);
   case X86::PCMPESTRM128REG:
   case X86::VPCMPESTRM128REG:
-    return EmitPCMP(MI, BB, 5, false /* in mem */);
   case X86::PCMPESTRM128MEM:
-  case X86::VPCMPESTRM128MEM:
-    return EmitPCMP(MI, BB, 5, true /* in mem */);
+  case X86::VPCMPESTRM128MEM: {
+    unsigned NumArgs;
+    bool MemArg;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::PCMPISTRM128REG:
+    case X86::VPCMPISTRM128REG:
+      NumArgs = 3; MemArg = false; break;
+    case X86::PCMPISTRM128MEM:
+    case X86::VPCMPISTRM128MEM:
+      NumArgs = 3; MemArg = true; break;
+    case X86::PCMPESTRM128REG:
+    case X86::VPCMPESTRM128REG:
+      NumArgs = 5; MemArg = false; break;
+    case X86::PCMPESTRM128MEM:
+    case X86::VPCMPESTRM128MEM:
+      NumArgs = 5; MemArg = true; break;
+    }
+    return EmitPCMP(MI, BB, NumArgs, MemArg);
+  }
 
     // Thread synchronization.
   case X86::MONITOR:
     return EmitMonitor(MI, BB);
-  case X86::MWAIT:
-    return EmitMwait(MI, BB);
 
     // Atomic Lowering.
-  case X86::ATOMAND32:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
-                                               X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32,
-                                               X86::NOT32r, X86::EAX,
-                                               &X86::GR32RegClass);
-  case X86::ATOMOR32:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
-                                               X86::OR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32,
-                                               X86::NOT32r, X86::EAX,
-                                               &X86::GR32RegClass);
-  case X86::ATOMXOR32:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
-                                               X86::XOR32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32,
-                                               X86::NOT32r, X86::EAX,
-                                               &X86::GR32RegClass);
-  case X86::ATOMNAND32:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
-                                               X86::AND32ri, X86::MOV32rm,
-                                               X86::LCMPXCHG32,
-                                               X86::NOT32r, X86::EAX,
-                                               &X86::GR32RegClass, true);
   case X86::ATOMMIN32:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
   case X86::ATOMMAX32:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
   case X86::ATOMUMIN32:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
   case X86::ATOMUMAX32:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+  case X86::ATOMMIN16:
+  case X86::ATOMMAX16:
+  case X86::ATOMUMIN16:
+  case X86::ATOMUMAX16:
+  case X86::ATOMMIN64:
+  case X86::ATOMMAX64:
+  case X86::ATOMUMIN64:
+  case X86::ATOMUMAX64: {
+    unsigned Opc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMMIN32:  Opc = X86::CMOVL32rr; break;
+    case X86::ATOMMAX32:  Opc = X86::CMOVG32rr; break;
+    case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break;
+    case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break;
+    case X86::ATOMMIN16:  Opc = X86::CMOVL16rr; break;
+    case X86::ATOMMAX16:  Opc = X86::CMOVG16rr; break;
+    case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break;
+    case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break;
+    case X86::ATOMMIN64:  Opc = X86::CMOVL64rr; break;
+    case X86::ATOMMAX64:  Opc = X86::CMOVG64rr; break;
+    case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break;
+    case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break;
+    // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+    }
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc);
+  }
+
+  case X86::ATOMAND32:
+  case X86::ATOMOR32:
+  case X86::ATOMXOR32:
+  case X86::ATOMNAND32: {
+    bool Invert = false;
+    unsigned RegOpc, ImmOpc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMAND32:
+      RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break;
+    case X86::ATOMOR32:
+      RegOpc = X86::OR32rr;  ImmOpc = X86::OR32ri; break;
+    case X86::ATOMXOR32:
+      RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break;
+    case X86::ATOMNAND32:
+      RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break;
+    }
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+                                               X86::MOV32rm, X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               &X86::GR32RegClass, Invert);
+  }
 
   case X86::ATOMAND16:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
-                                               X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16,
-                                               X86::NOT16r, X86::AX,
-                                               &X86::GR16RegClass);
   case X86::ATOMOR16:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
-                                               X86::OR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16,
-                                               X86::NOT16r, X86::AX,
-                                               &X86::GR16RegClass);
   case X86::ATOMXOR16:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
-                                               X86::XOR16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16,
-                                               X86::NOT16r, X86::AX,
-                                               &X86::GR16RegClass);
-  case X86::ATOMNAND16:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
-                                               X86::AND16ri, X86::MOV16rm,
-                                               X86::LCMPXCHG16,
+  case X86::ATOMNAND16: {
+    bool Invert = false;
+    unsigned RegOpc, ImmOpc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMAND16:
+      RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break;
+    case X86::ATOMOR16:
+      RegOpc = X86::OR16rr;  ImmOpc = X86::OR16ri; break;
+    case X86::ATOMXOR16:
+      RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break;
+    case X86::ATOMNAND16:
+      RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break;
+    }
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+                                               X86::MOV16rm, X86::LCMPXCHG16,
                                                X86::NOT16r, X86::AX,
-                                               &X86::GR16RegClass, true);
-  case X86::ATOMMIN16:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
-  case X86::ATOMMAX16:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
-  case X86::ATOMUMIN16:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
-  case X86::ATOMUMAX16:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+                                               &X86::GR16RegClass, Invert);
+  }
 
   case X86::ATOMAND8:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
-                                               X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8,
-                                               X86::NOT8r, X86::AL,
-                                               &X86::GR8RegClass);
   case X86::ATOMOR8:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
-                                               X86::OR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8,
-                                               X86::NOT8r, X86::AL,
-                                               &X86::GR8RegClass);
   case X86::ATOMXOR8:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
-                                               X86::XOR8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8,
-                                               X86::NOT8r, X86::AL,
-                                               &X86::GR8RegClass);
-  case X86::ATOMNAND8:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
-                                               X86::AND8ri, X86::MOV8rm,
-                                               X86::LCMPXCHG8,
+  case X86::ATOMNAND8: {
+    bool Invert = false;
+    unsigned RegOpc, ImmOpc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMAND8:
+      RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break;
+    case X86::ATOMOR8:
+      RegOpc = X86::OR8rr;  ImmOpc = X86::OR8ri; break;
+    case X86::ATOMXOR8:
+      RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break;
+    case X86::ATOMNAND8:
+      RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break;
+    }
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+                                               X86::MOV8rm, X86::LCMPXCHG8,
                                                X86::NOT8r, X86::AL,
-                                               &X86::GR8RegClass, true);
-  // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+                                               &X86::GR8RegClass, Invert);
+  }
+
   // This group is for 64-bit host.
   case X86::ATOMAND64:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
-                                               X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64,
-                                               X86::NOT64r, X86::RAX,
-                                               &X86::GR64RegClass);
   case X86::ATOMOR64:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
-                                               X86::OR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64,
-                                               X86::NOT64r, X86::RAX,
-                                               &X86::GR64RegClass);
   case X86::ATOMXOR64:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
-                                               X86::XOR64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64,
-                                               X86::NOT64r, X86::RAX,
-                                               &X86::GR64RegClass);
-  case X86::ATOMNAND64:
-    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
-                                               X86::AND64ri32, X86::MOV64rm,
-                                               X86::LCMPXCHG64,
+  case X86::ATOMNAND64: {
+    bool Invert = false;
+    unsigned RegOpc, ImmOpc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMAND64:
+      RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break;
+    case X86::ATOMOR64:
+      RegOpc = X86::OR64rr;  ImmOpc = X86::OR64ri32; break;
+    case X86::ATOMXOR64:
+      RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break;
+    case X86::ATOMNAND64:
+      RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break;
+    }
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc,
+                                               X86::MOV64rm, X86::LCMPXCHG64,
                                                X86::NOT64r, X86::RAX,
-                                               &X86::GR64RegClass, true);
-  case X86::ATOMMIN64:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
-  case X86::ATOMMAX64:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
-  case X86::ATOMUMIN64:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
-  case X86::ATOMUMAX64:
-    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+                                               &X86::GR64RegClass, Invert);
+  }
 
   // This group does 64-bit operations on a 32-bit host.
   case X86::ATOMAND6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::AND32rr, X86::AND32rr,
-                                               X86::AND32ri, X86::AND32ri,
-                                               false);
   case X86::ATOMOR6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::OR32rr, X86::OR32rr,
-                                               X86::OR32ri, X86::OR32ri,
-                                               false);
   case X86::ATOMXOR6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::XOR32rr, X86::XOR32rr,
-                                               X86::XOR32ri, X86::XOR32ri,
-                                               false);
   case X86::ATOMNAND6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::AND32rr, X86::AND32rr,
-                                               X86::AND32ri, X86::AND32ri,
-                                               true);
   case X86::ATOMADD6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::ADD32rr, X86::ADC32rr,
-                                               X86::ADD32ri, X86::ADC32ri,
-                                               false);
   case X86::ATOMSUB6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::SUB32rr, X86::SBB32rr,
-                                               X86::SUB32ri, X86::SBB32ri,
-                                               false);
-  case X86::ATOMSWAP6432:
-    return EmitAtomicBit6432WithCustomInserter(MI, BB,
-                                               X86::MOV32rr, X86::MOV32rr,
-                                               X86::MOV32ri, X86::MOV32ri,
-                                               false);
+  case X86::ATOMSWAP6432: {
+    bool Invert = false;
+    unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::ATOMAND6432:
+      RegOpcL = RegOpcH = X86::AND32rr;
+      ImmOpcL = ImmOpcH = X86::AND32ri;
+      break;
+    case X86::ATOMOR6432:
+      RegOpcL = RegOpcH = X86::OR32rr;
+      ImmOpcL = ImmOpcH = X86::OR32ri;
+      break;
+    case X86::ATOMXOR6432:
+      RegOpcL = RegOpcH = X86::XOR32rr;
+      ImmOpcL = ImmOpcH = X86::XOR32ri;
+      break;
+    case X86::ATOMNAND6432:
+      RegOpcL = RegOpcH = X86::AND32rr;
+      ImmOpcL = ImmOpcH = X86::AND32ri;
+      Invert = true;
+      break;
+    case X86::ATOMADD6432:
+      RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr;
+      ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri;
+      break;
+    case X86::ATOMSUB6432:
+      RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr;
+      ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri;
+      break;
+    case X86::ATOMSWAP6432:
+      RegOpcL = RegOpcH = X86::MOV32rr;
+      ImmOpcL = ImmOpcH = X86::MOV32ri;
+      break;
+    }
+    return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH,
+                                               ImmOpcL, ImmOpcH, Invert);
+  }
+
   case X86::VASTART_SAVE_XMM_REGS:
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
 
@@ -13043,7 +13330,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
                                   false/*WriteMem*/);
         return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
       }
-    } 
+    }
 
     // Emit a zeroed vector and insert the desired subvector on its
     // first half.
@@ -13086,12 +13373,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
+  if (Subtarget->hasAVX() && VT.is256BitVector() &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
     return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
   // Only handle 128 wide vector from here on.
-  if (VT.getSizeInBits() != 128)
+  if (!VT.is128BitVector())
     return SDValue();
 
   // Combine a vector_shuffle that is equal to build_vector load1, load2, load3,
@@ -13109,7 +13396,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
 /// a sequence of vector shuffle operations.
 /// It is possible when we truncate 256-bit vector to 128-bit vector
 
-SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, 
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
                                                   DAGCombinerInfo &DCI) const {
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
@@ -13151,8 +13438,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
     // PSHUFD
     static const int ShufMask1[] = {0, 2, 0, 0};
 
-    OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1);
-    OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1);
+    SDValue Undef = DAG.getUNDEF(VT);
+    OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1);
+    OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1);
 
     // MOVLHPS
     static const int ShufMask2[] = {0, 1, 4, 5};
@@ -13210,10 +13498,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
     static const int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13,
                                    -1, -1, -1, -1, -1, -1, -1, -1};
 
-    OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8),
-                                ShufMask1);
-    OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8),
-                                ShufMask1);
+    SDValue Undef = DAG.getUNDEF(MVT::v16i8);
+    OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1);
+    OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1);
 
     OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
     OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
@@ -13718,6 +14005,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// Check whether a boolean test is testing a boolean value generated by
+// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
+// code.
+//
+// Simplify the following patterns:
+// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ)
+// to (Op EFLAGS Cond)
+//
+// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or
+// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ)
+// to (Op EFLAGS !Cond)
+//
+// where Op could be BRCOND or CMOV.
+//
+static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
+  // Quit if not CMP and SUB with its value result used.
+  if (Cmp.getOpcode() != X86ISD::CMP &&
+      (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0)))
+      return SDValue();
+
+  // Quit if not used as a boolean value.
+  if (CC != X86::COND_E && CC != X86::COND_NE)
+    return SDValue();
+
+  // Check CMP operands. One of them should be 0 or 1 and the other should be
+  // an SetCC or extended from it.
+  SDValue Op1 = Cmp.getOperand(0);
+  SDValue Op2 = Cmp.getOperand(1);
+
+  SDValue SetCC;
+  const ConstantSDNode* C = 0;
+  bool needOppositeCond = (CC == X86::COND_E);
+
+  if ((C = dyn_cast<ConstantSDNode>(Op1)))
+    SetCC = Op2;
+  else if ((C = dyn_cast<ConstantSDNode>(Op2)))
+    SetCC = Op1;
+  else // Quit if all operands are not constants.
+    return SDValue();
+
+  if (C->getZExtValue() == 1)
+    needOppositeCond = !needOppositeCond;
+  else if (C->getZExtValue() != 0)
+    // Quit if the constant is neither 0 or 1.
+    return SDValue();
+
+  // Skip 'zext' node.
+  if (SetCC.getOpcode() == ISD::ZERO_EXTEND)
+    SetCC = SetCC.getOperand(0);
+
+  // Quit if not SETCC.
+  // FIXME: So far we only handle the boolean value generated from SETCC. If
+  // there is other ways to generate boolean values, we need handle them here
+  // as well.
+  if (SetCC.getOpcode() != X86ISD::SETCC)
+    return SDValue();
+
+  // Set the condition code or opposite one if necessary.
+  CC = X86::CondCode(SetCC.getConstantOperandVal(0));
+  if (needOppositeCond)
+    CC = X86::GetOppositeBranchCondition(CC);
+
+  return SetCC.getOperand(1);
+}
+
+static bool IsValidFCMOVCondition(X86::CondCode CC) {
+  switch (CC) {
+  default:
+    return false;
+  case X86::COND_B:
+  case X86::COND_BE:
+  case X86::COND_E:
+  case X86::COND_P:
+  case X86::COND_AE:
+  case X86::COND_A:
+  case X86::COND_NE:
+  case X86::COND_NP:
+    return true;
+  }
+}
+
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI) {
@@ -13731,6 +14100,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
   SDValue TrueOp = N->getOperand(1);
   X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
   SDValue Cond = N->getOperand(3);
+
   if (CC == X86::COND_E || CC == X86::COND_NE) {
     switch (Cond.getOpcode()) {
     default: break;
@@ -13742,6 +14112,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  SDValue Flags;
+
+  Flags = BoolTestSetCCCombine(Cond, CC);
+  if (Flags.getNode() &&
+      // Extra check as FCMOV only supports a subset of X86 cond.
+      (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) {
+    SDValue Ops[] = { FalseOp, TrueOp,
+                      DAG.getConstant(CC, MVT::i8), Flags };
+    return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
+                       Ops, array_lengthof(Ops));
+  }
+
   // If this is a select between two integer constants, try to do some
   // optimizations.  Note that the operands are ordered the opposite of SELECT
   // operands.
@@ -14164,7 +14546,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) {
 
   // Sometimes the operand may come from a insert_subvector building a 256-bit
   // allones vector
-  if (VT.getSizeInBits() == 256 &&
+  if (VT.is256BitVector() &&
       N->getOpcode() == ISD::INSERT_SUBVECTOR) {
     SDValue V1 = N->getOperand(0);
     SDValue V2 = N->getOperand(1);
@@ -14609,7 +14991,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
   // memory  operation.
-  if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() &&
+  if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
       StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
       StoredVal.getNumOperands() == 2) {
     SDValue Value0 = StoredVal.getOperand(0);
@@ -14992,6 +15374,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and
+/// X86ISD::FMAX nodes.
+static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX);
+
+  // Only perform optimizations if UnsafeMath is used.
+  if (!DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+
+  // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
+  // into FMINC and MMAXC, which are Commutative operations.
+  unsigned NewOp = 0;
+  switch (N->getOpcode()) {
+    default: llvm_unreachable("unknown opcode");
+    case X86ISD::FMIN:  NewOp = X86ISD::FMINC; break;
+    case X86ISD::FMAX:  NewOp = X86ISD::FMAXC; break;
+  }
+
+  return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0),
+                     N->getOperand(0), N->getOperand(1));
+}
+
+
 /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
 static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
   // FAND(0.0, x) -> 0.0
@@ -15067,19 +15472,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
     // concat the vectors to original VT
 
     unsigned NumElems = OpVT.getVectorNumElements();
+    SDValue Undef = DAG.getUNDEF(OpVT);
+
     SmallVector<int,8> ShufMask1(NumElems, -1);
     for (unsigned i = 0; i != NumElems/2; ++i)
       ShufMask1[i] = i;
 
-    SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
-                                        &ShufMask1[0]);
+    SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]);
 
     SmallVector<int,8> ShufMask2(NumElems, -1);
     for (unsigned i = 0; i != NumElems/2; ++i)
       ShufMask2[i] = i + NumElems/2;
 
-    SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
-                                        &ShufMask2[0]);
+    SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]);
 
     EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
                                   VT.getVectorNumElements()/2);
@@ -15092,6 +15497,40 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget* Subtarget) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  EVT ScalarVT = VT.getScalarType();
+  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA())
+    return SDValue();
+
+  SDValue A = N->getOperand(0);
+  SDValue B = N->getOperand(1);
+  SDValue C = N->getOperand(2);
+
+  bool NegA = (A.getOpcode() == ISD::FNEG);
+  bool NegB = (B.getOpcode() == ISD::FNEG);
+  bool NegC = (C.getOpcode() == ISD::FNEG);
+
+  // Negative multiplication when NegA xor NegB
+  bool NegMul = (NegA != NegB);
+  if (NegA)
+    A = A.getOperand(0);
+  if (NegB)
+    B = B.getOperand(0);
+  if (NegC)
+    C = C.getOperand(0);
+
+  unsigned Opcode;
+  if (!NegMul)
+    Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB;
+  else
+    Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB;
+  return DAG.getNode(Opcode, dl, VT, A, B, C);
+}
+
 static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
@@ -15164,7 +15603,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
 static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   SDValue LHS = N->getOperand(0);
-  SDValue RHS = N->getOperand(1); 
+  SDValue RHS = N->getOperand(1);
 
   if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
@@ -15187,19 +15626,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) {
 
 // Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
 static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
-  unsigned X86CC = N->getConstantOperandVal(0);
-  SDValue EFLAG = N->getOperand(1);
   DebugLoc DL = N->getDebugLoc();
+  X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
+  SDValue EFLAGS = N->getOperand(1);
 
   // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
   // a zext and produces an all-ones bit which is more useful than 0/1 in some
   // cases.
-  if (X86CC == X86::COND_B)
+  if (CC == X86::COND_B)
     return DAG.getNode(ISD::AND, DL, MVT::i8,
                        DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
-                                   DAG.getConstant(X86CC, MVT::i8), EFLAG),
+                                   DAG.getConstant(CC, MVT::i8), EFLAGS),
                        DAG.getConstant(1, MVT::i8));
 
+  SDValue Flags;
+
+  Flags = BoolTestSetCCCombine(EFLAGS, CC);
+  if (Flags.getNode()) {
+    SDValue Cond = DAG.getConstant(CC, MVT::i8);
+    return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
+  }
+
+  return SDValue();
+}
+
+// Optimize branch condition evaluation.
+//
+static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const X86Subtarget *Subtarget) {
+  DebugLoc DL = N->getDebugLoc();
+  SDValue Chain = N->getOperand(0);
+  SDValue Dest = N->getOperand(1);
+  SDValue EFLAGS = N->getOperand(3);
+  X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
+
+  SDValue Flags;
+
+  Flags = BoolTestSetCCCombine(EFLAGS, CC);
+  if (Flags.getNode()) {
+    SDValue Cond = DAG.getConstant(CC, MVT::i8);
+    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+                       Flags);
+  }
+
   return SDValue();
 }
 
@@ -15408,6 +15878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return PerformFORCombine(N, DAG);
+  case X86ISD::FMIN:
+  case X86ISD::FMAX:        return PerformFMinFMaxCombine(N, DAG);
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
@@ -15417,6 +15889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG, DCI);
   case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
+  case X86ISD::BRCOND:      return PerformBrCondCombine(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
   case X86ISD::PALIGN:
   case X86ISD::UNPCKH:
@@ -15431,6 +15904,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERMILP:
   case X86ISD::VPERM2X128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
+  case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
   }
 
   return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 78e4d75..74f5167 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -137,10 +137,6 @@ namespace llvm {
       /// relative displacements.
       WrapperRIP,
 
-      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
-      /// of an XMM vector, with the high word zero filled.
-      MOVQ2DQ,
-
       /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
       /// to an MMX vector.  If you think this is too close to the previous
       /// mnemonic, so do I; blame Intel.
@@ -199,6 +195,9 @@ namespace llvm {
       ///
       FMAX, FMIN,
 
+      /// FMAXC, FMINC - Commutative FMIN and FMAX.
+      FMAXC, FMINC,
+
       /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
       /// approximation.  Note that these typically require refinement
       /// in order to obtain suitable precision.
@@ -231,6 +230,9 @@ namespace llvm {
       // VSEXT_MOVL - Vector move low and sign extend.
       VSEXT_MOVL,
 
+      // VFPEXT - Vector FP extend.
+      VFPEXT,
+
       // VSHL, VSRL - 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -294,6 +296,14 @@ namespace llvm {
       // PMULUDQ - Vector multiply packed unsigned doubleword integers
       PMULUDQ,
 
+      // FMA nodes
+      FMADD,
+      FNMADD,
+      FMSUB,
+      FNMSUB,
+      FMADDSUB,
+      FMSUBADD,
+
       // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
       // according to %al. An operator is needed so that this can be expanded
       // with control flow.
@@ -325,6 +335,10 @@ namespace llvm {
       // RDRAND - Get a random integer and indicate whether it is valid in CF.
       RDRAND,
 
+      // PCMP*STRI
+      PCMPISTRI,
+      PCMPESTRI,
+
       // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
       // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
       // Atomic 64-bit binary operations.
@@ -597,6 +611,12 @@ namespace llvm {
     virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
 
+    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
+    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
+    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
+    /// is expanded to mul + add.
+    virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+
     /// isNarrowingProfitable - Return true if it's profitable to narrow
     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
     /// from i32 to i8 but not from i32 to i16.
@@ -656,7 +676,8 @@ namespace llvm {
 
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
-    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                                     const TargetLibraryInfo *libInfo) const;
 
     /// getStackCookieLocation - Return true if the target stores stack
     /// protector cookies at a fixed offset in some non-standard address
@@ -813,6 +834,8 @@ namespace llvm {
     SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
     SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
@@ -844,9 +867,6 @@ namespace llvm {
                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                    LLVMContext &Context) const;
 
-    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
-                                 SelectionDAG &DAG, unsigned NewOp) const;
-
     /// Utility function to emit string processing sse4.2 instructions
     /// that return in xmm0.
     /// This takes the instruction to expand, the associated machine basic
@@ -933,7 +953,8 @@ namespace llvm {
   };
 
   namespace X86 {
-    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                             const TargetLibraryInfo *libInfo);
   }
 }
 
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index b6ba68f..f790611 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
                          X86xor_flag, xor, 1, 0>;
 defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
                          X86add_flag, add, 1, 1>;
+let isCompare = 1 in {
 defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
                          X86sub_flag, sub, 0, 0>;
+}
 
 // Arithmetic.
 let Uses = [EFLAGS] in {
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index 0d5490a..2eb454d 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in {
 
 
 // Sign/Zero extenders
+let neverHasSideEffects = 1 in {
 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
                    TB, OpSize;
+let mayLoad = 1 in
 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
                    TB, OpSize;
+} // neverHasSideEffects = 1
 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
@@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
                    TB;
 
+let neverHasSideEffects = 1 in {
 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
                    TB, OpSize;
+let mayLoad = 1 in
 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
                    "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
                    TB, OpSize;
+} // neverHasSideEffects = 1
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
                    [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
@@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
 // These are the same as the regular MOVZX32rr8 and MOVZX32rm8
 // except that they use GR32_NOREX for the output operand register class
 // instead of GR32. This allows them to operate on h registers on x86-64.
+let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
@@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
                          [], IIC_MOVZX>, TB;
+}
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
 // operand, which makes it a rare instruction with an 8-bit register
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 8802a2e..95ee7e5 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -16,159 +16,307 @@
 //===----------------------------------------------------------------------===//
 
 let Constraints = "$src1 = $dst" in {
-multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
-let neverHasSideEffects = 1 in {
-  def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, VR128:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-  let mayLoad = 1 in
-  def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-  def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
-           (ins VR256:$src1, VR256:$src2, VR256:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-  let mayLoad = 1 in
-  def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
-           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-} // neverHasSideEffects = 1
-}
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
+                    PatFrag MemFrag128, PatFrag MemFrag256,
+                    ValueType OpVT128, ValueType OpVT256,
+                    SDPatternOperator Op = null_frag, bit MayLoad = 1> {
+  def r     : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
+                                               VR128:$src1, VR128:$src3)))]>;
 
-// Intrinsic for 132 pattern
-multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr,
-                        PatFrag MemFrag128, PatFrag MemFrag256,
-                        Intrinsic Int128, Intrinsic Int256> {
-  def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, VR128:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src3, VR128:$src2))]>;
-  def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR128:$dst,
-             (Int128 VR128:$src1, (MemFrag128 addr:$src3), VR128:$src2))]>;
-  def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
-           (ins VR256:$src1, VR256:$src2, VR256:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src3, VR256:$src2))]>;
-  def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
-           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR256:$dst,
-             (Int256 VR256:$src1, (MemFrag256 addr:$src3), VR256:$src2))]>;
+  let mayLoad = MayLoad in
+  def m     : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+                   (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
+                                               (MemFrag128 addr:$src3))))]>;
+
+  def rY    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+                   (ins VR256:$src1, VR256:$src2, VR256:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
+                                               VR256:$src3)))]>;
+
+  let mayLoad = MayLoad in
+  def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+                   (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR256:$dst,
+                     (OpVT256 (Op VR256:$src2, VR256:$src1,
+                               (MemFrag256 addr:$src3))))]>;
 }
 } // Constraints = "$src1 = $dst"
 
 multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                        string OpcodeStr, string PackTy,
                        PatFrag MemFrag128, PatFrag MemFrag256,
-                       Intrinsic Int128, Intrinsic Int256> {
-  defm r132 : fma3p_rm_int <opc132, !strconcat(OpcodeStr,
-                            !strconcat("132", PackTy)), MemFrag128, MemFrag256,
-                            Int128, Int256>;
-  defm r132 : fma3p_rm <opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
-  defm r213 : fma3p_rm <opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
-  defm r231 : fma3p_rm <opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+                       SDNode Op, ValueType OpTy128, ValueType OpTy256> {
+  defm r213 : fma3p_rm<opc213,
+                       !strconcat(OpcodeStr, !strconcat("213", PackTy)),
+                       MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>;
+let neverHasSideEffects = 1 in {
+  defm r132 : fma3p_rm<opc132,
+                       !strconcat(OpcodeStr, !strconcat("132", PackTy)),
+                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
+  defm r231 : fma3p_rm<opc231,
+                       !strconcat(OpcodeStr, !strconcat("231", PackTy)),
+                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
+} // neverHasSideEffects = 1
 }
 
 // Fused Multiply-Add
 let ExeDomain = SSEPackedSingle in {
   defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32,
-    memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>;
-  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps",  memopv4f32,
-    memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>;
+                                 memopv8f32, X86Fmadd, v4f32, v8f32>;
+  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32,
+                                 memopv8f32, X86Fmsub, v4f32, v8f32>;
   defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
-    memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps,
-    int_x86_fma_vfmaddsub_ps_256>;
+                                 memopv4f32, memopv8f32, X86Fmaddsub,
+                                 v4f32, v8f32>;
   defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
-    memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps,
-    int_x86_fma_vfmaddsub_ps_256>;
+                                 memopv4f32, memopv8f32, X86Fmsubadd,
+                                 v4f32, v8f32>;
 }
 
 let ExeDomain = SSEPackedDouble in {
   defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W;
+                                 memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
   defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W;
-  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W;
-  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W;
+                                 memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
+  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
+                                 memopv2f64, memopv4f64, X86Fmaddsub,
+                                 v2f64, v4f64>, VEX_W;
+  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
+                                 memopv2f64, memopv4f64, X86Fmsubadd,
+                                 v2f64, v4f64>, VEX_W;
 }
 
 // Fused Negative Multiply-Add
 let ExeDomain = SSEPackedSingle in {
   defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps",  memopv4f32,
-    memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>;
+                               memopv8f32, X86Fnmadd, v4f32, v8f32>;
   defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps",  memopv4f32,
-    memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>;
+                               memopv8f32, X86Fnmsub, v4f32, v8f32>;
 }
 let ExeDomain = SSEPackedDouble in {
   defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W;
-  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64,
-    memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W;
+                               memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
+                               memopv2f64, memopv4f64, X86Fnmsub, v2f64,
+                               v4f64>, VEX_W;
 }
 
+let Predicates = [HasFMA] in {
+  def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
+             (memopv4f32 addr:$src3)),
+            (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
+             (memopv8f32 addr:$src3)),
+            (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
+            (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
+             (memopv2f64 addr:$src3)),
+            (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
+
+  def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
+            (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
+             (memopv4f64 addr:$src3)),
+            (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
+
+} // Predicates = [HasFMA]
 
 let Constraints = "$src1 = $dst" in {
 multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
-                    RegisterClass RC> {
-let neverHasSideEffects = 1 in {
-  def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
-           (ins RC:$src1, RC:$src2, RC:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-  let mayLoad = 1 in
-  def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
-           (ins RC:$src1, RC:$src2, x86memop:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           []>;
-} // neverHasSideEffects = 1
+                    RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+                    SDPatternOperator OpNode = null_frag, bit MayLoad = 1> {
+  def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst,
+                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+  let mayLoad = MayLoad in
+  def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, x86memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set RC:$dst,
+                     (OpVT (OpNode RC:$src2, RC:$src1,
+                            (mem_frag addr:$src3))))]>;
 }
 
 multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop,
-                        ComplexPattern mem_cpat, Intrinsic IntId> {
+                        ComplexPattern mem_cpat, Intrinsic IntId,
+                        RegisterClass RC> {
   def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, VR128:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR128:$dst, (IntId VR128:$src1, VR128:$src3, VR128:$src2))]>;
+                   (ins VR128:$src1, VR128:$src2, VR128:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1,
+                     VR128:$src3))]>;
   def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
-           (ins VR128:$src1, VR128:$src2, memop:$src3),
-           !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-           [(set VR128:$dst,
-             (IntId VR128:$src1, mem_cpat:$src3, VR128:$src2))]>;
+                   (ins VR128:$src1, VR128:$src2, memop:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   [(set VR128:$dst,
+                     (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>;
 }
 } // Constraints = "$src1 = $dst"
 
 multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                       string OpStr, Intrinsic IntF32, Intrinsic IntF64> {
-  defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>;
-  defm SSr213 : fma3s_rm<opc213, !strconcat(OpStr, "213ss"), f32mem, FR32>;
-  defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>;
-  defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, VEX_W;
-  defm SDr213 : fma3s_rm<opc213, !strconcat(OpStr, "213sd"), f64mem, FR64>, VEX_W;
-  defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, VEX_W;
-  defm SSr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132ss"), ssmem,
-                              sse_load_f32, IntF32>;
-  defm SDr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132sd"), sdmem,
-                              sse_load_f64, IntF64>;
+                       string OpStr, string PackTy, Intrinsic Int,
+                       SDNode OpNode, RegisterClass RC, ValueType OpVT,
+                       X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
+                       ComplexPattern mem_cpat> {
+let neverHasSideEffects = 1 in {
+  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)),
+                       x86memop, RC, OpVT, mem_frag>;
+  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)),
+                       x86memop, RC, OpVT, mem_frag>;
+}
+
+defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+                     x86memop, RC, OpVT, mem_frag, OpNode, 0>,
+            fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)),
+                         memop, mem_cpat, Int, RC>;
+}
+
+multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                 string OpStr, Intrinsic IntF32, Intrinsic IntF64,
+                 SDNode OpNode> {
+  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode,
+                        FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
+  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode,
+                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
 }
 
-defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
-                          int_x86_fma_vfmadd_sd>, VEX_LIG;
-defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
-                          int_x86_fma_vfmsub_sd>, VEX_LIG;
+defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
+                    int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG;
+defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss,
+                    int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG;
 
-defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
-                           int_x86_fma_vfnmadd_sd>, VEX_LIG;
-defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
-                           int_x86_fma_vfnmsub_sd>, VEX_LIG;
+defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss,
+                     int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG;
+defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss,
+                     int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG;
 
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a115ab4..81b4f81 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -366,7 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
 // 
 //   SDI    - SSE2 instructions with XD prefix.
 //   SDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix.
-//   SSDI   - SSE2 instructions with XS prefix.
+//   S2SI   - SSE2 instructions with XS prefix.
 //   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
 //   PDI    - SSE2 instructions with TB and OpSize prefixes.
 //   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
@@ -379,10 +379,10 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
 class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
       : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
-class SSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
       : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>;
-class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
 class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -397,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
         Requires<[HasAVX]>;
+class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
+        Requires<[HasAVX]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index ec030dd..ee2d3c4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
 
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
+
+// Commutative and Associative FMIN and FMAX.
+def X86fminc    : SDNode<"X86ISD::FMINC", SDTFPBinOp,
+    [SDNPCommutative, SDNPAssociative]>;
+def X86fmaxc    : SDNode<"X86ISD::FMAXC", SDTFPBinOp,
+    [SDNPCommutative, SDNPAssociative]>;
+
 def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
@@ -73,14 +80,20 @@ def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
 
 def X86vzmovly  : SDNode<"X86ISD::VZEXT_MOVL",
-                 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 
+                 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                       SDTCisOpSmallerThanOp<1, 0> ]>>;
 
 def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",
-                 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+                 SDTypeProfile<1, 1,
+                 [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
 
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
@@ -125,7 +138,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
 
 def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+                             SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
+
+def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
+                           SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
 
 def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
 
@@ -160,9 +176,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 
-def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
-def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
-def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+def X86Blendpw   : SDNode<"X86ISD::BLENDPW",   SDTBlend>;
+def X86Blendps   : SDNode<"X86ISD::BLENDPS",   SDTBlend>;
+def X86Blendpd   : SDNode<"X86ISD::BLENDPD",   SDTBlend>;
+def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
+def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
+def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
+def X86Fnmsub    : SDNode<"X86ISD::FNMSUB",    SDTFma>;
+def X86Fmaddsub  : SDNode<"X86ISD::FMSUBADD",  SDTFma>;
+def X86Fmsubadd  : SDNode<"X86ISD::FMADDSUB",  SDTFma>;
+
+def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                         SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
+                                         SDTCisVT<4, i8>]>;
+def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                         SDTCisVT<2, v16i8>, SDTCisVT<3, i32>,
+                                         SDTCisVT<4, v16i8>, SDTCisVT<5, i32>,
+                                         SDTCisVT<6, i8>]>;
+
+def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>;
+def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 69493bc..459f01a 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          0 },
     { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        0 },
     { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          0 },
-    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      0 },
-    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
-    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
-    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm,    0 },
-    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
-    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      0 },
     { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm,         TB_ALIGN_16 },
     { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm,         TB_ALIGN_16 },
     { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  0 },
@@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL64rr,        X86::IMUL64rm,      0 },
     { X86::Int_CMPSDrr,     X86::Int_CMPSDrm,   0 },
     { X86::Int_CMPSSrr,     X86::Int_CMPSSrm,   0 },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      0 },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm,    0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      0 },
     { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
     { X86::MAXPDrr_Int,     X86::MAXPDrm_Int,   TB_ALIGN_16 },
     { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
@@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMADDSDr132r,         X86::VFMADDSDr132m,         0 },
     { X86::VFMADDSSr213r,         X86::VFMADDSSr213m,         0 },
     { X86::VFMADDSDr213r,         X86::VFMADDSDr213m,         0 },
-    { X86::VFMADDSSr132r_Int,     X86::VFMADDSSr132m_Int,     0 },
-    { X86::VFMADDSDr132r_Int,     X86::VFMADDSDr132m_Int,     0 },
+    { X86::VFMADDSSr213r_Int,     X86::VFMADDSSr213m_Int,     0 },
+    { X86::VFMADDSDr213r_Int,     X86::VFMADDSDr213m_Int,     0 },
 
     { X86::VFMADDPSr231r,         X86::VFMADDPSr231m,         TB_ALIGN_16 },
     { X86::VFMADDPDr231r,         X86::VFMADDPDr231m,         TB_ALIGN_16 },
@@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMADDPDr132rY,        X86::VFMADDPDr132mY,        TB_ALIGN_32 },
     { X86::VFMADDPSr213rY,        X86::VFMADDPSr213mY,        TB_ALIGN_32 },
     { X86::VFMADDPDr213rY,        X86::VFMADDPDr213mY,        TB_ALIGN_32 },
-    { X86::VFMADDPSr132r_Int,     X86::VFMADDPSr132m_Int,     TB_ALIGN_16 },
-    { X86::VFMADDPDr132r_Int,     X86::VFMADDPDr132m_Int,     TB_ALIGN_16 },
-    { X86::VFMADDPSr132rY_Int,    X86::VFMADDPSr132mY_Int,    TB_ALIGN_32 },
-    { X86::VFMADDPDr132rY_Int,    X86::VFMADDPDr132mY_Int,    TB_ALIGN_32 },
 
     { X86::VFNMADDSSr231r,        X86::VFNMADDSSr231m,        0 },
     { X86::VFNMADDSDr231r,        X86::VFNMADDSDr231m,        0 },
@@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFNMADDSDr132r,        X86::VFNMADDSDr132m,        0 },
     { X86::VFNMADDSSr213r,        X86::VFNMADDSSr213m,        0 },
     { X86::VFNMADDSDr213r,        X86::VFNMADDSDr213m,        0 },
-    { X86::VFNMADDSSr132r_Int,    X86::VFNMADDSSr132m_Int,    0 },
-    { X86::VFNMADDSDr132r_Int,    X86::VFNMADDSDr132m_Int,    0 },
+    { X86::VFNMADDSSr213r_Int,    X86::VFNMADDSSr213m_Int,    0 },
+    { X86::VFNMADDSDr213r_Int,    X86::VFNMADDSDr213m_Int,    0 },
 
     { X86::VFNMADDPSr231r,        X86::VFNMADDPSr231m,        TB_ALIGN_16 },
     { X86::VFNMADDPDr231r,        X86::VFNMADDPDr231m,        TB_ALIGN_16 },
@@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFNMADDPDr132rY,       X86::VFNMADDPDr132mY,       TB_ALIGN_32 },
     { X86::VFNMADDPSr213rY,       X86::VFNMADDPSr213mY,       TB_ALIGN_32 },
     { X86::VFNMADDPDr213rY,       X86::VFNMADDPDr213mY,       TB_ALIGN_32 },
-    { X86::VFNMADDPSr132r_Int,    X86::VFNMADDPSr132m_Int,    TB_ALIGN_16 },
-    { X86::VFNMADDPDr132r_Int,    X86::VFNMADDPDr132m_Int,    TB_ALIGN_16 },
-    { X86::VFNMADDPSr132rY_Int,   X86::VFNMADDPSr132mY_Int,   TB_ALIGN_32 },
-    { X86::VFNMADDPDr132rY_Int,   X86::VFNMADDPDr132mY_Int,   TB_ALIGN_32 },
 
     { X86::VFMSUBSSr231r,         X86::VFMSUBSSr231m,         0 },
     { X86::VFMSUBSDr231r,         X86::VFMSUBSDr231m,         0 },
@@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMSUBSDr132r,         X86::VFMSUBSDr132m,         0 },
     { X86::VFMSUBSSr213r,         X86::VFMSUBSSr213m,         0 },
     { X86::VFMSUBSDr213r,         X86::VFMSUBSDr213m,         0 },
-    { X86::VFMSUBSSr132r_Int,     X86::VFMSUBSSr132m_Int,     0 },
-    { X86::VFMSUBSDr132r_Int,     X86::VFMSUBSDr132m_Int,     0 },
+    { X86::VFMSUBSSr213r_Int,     X86::VFMSUBSSr213m_Int,     0 },
+    { X86::VFMSUBSDr213r_Int,     X86::VFMSUBSDr213m_Int,     0 },
 
     { X86::VFMSUBPSr231r,         X86::VFMSUBPSr231m,         TB_ALIGN_16 },
     { X86::VFMSUBPDr231r,         X86::VFMSUBPDr231m,         TB_ALIGN_16 },
@@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMSUBPDr132rY,        X86::VFMSUBPDr132mY,        TB_ALIGN_32 },
     { X86::VFMSUBPSr213rY,        X86::VFMSUBPSr213mY,        TB_ALIGN_32 },
     { X86::VFMSUBPDr213rY,        X86::VFMSUBPDr213mY,        TB_ALIGN_32 },
-    { X86::VFMSUBPSr132r_Int,     X86::VFMSUBPSr132m_Int,     TB_ALIGN_16 },
-    { X86::VFMSUBPDr132r_Int,     X86::VFMSUBPDr132m_Int,     TB_ALIGN_16 },
-    { X86::VFMSUBPSr132rY_Int,    X86::VFMSUBPSr132mY_Int,    TB_ALIGN_32 },
-    { X86::VFMSUBPDr132rY_Int,    X86::VFMSUBPDr132mY_Int,    TB_ALIGN_32 },
 
     { X86::VFNMSUBSSr231r,        X86::VFNMSUBSSr231m,        0 },
     { X86::VFNMSUBSDr231r,        X86::VFNMSUBSDr231m,        0 },
@@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFNMSUBSDr132r,        X86::VFNMSUBSDr132m,        0 },
     { X86::VFNMSUBSSr213r,        X86::VFNMSUBSSr213m,        0 },
     { X86::VFNMSUBSDr213r,        X86::VFNMSUBSDr213m,        0 },
-    { X86::VFNMSUBSSr132r_Int,    X86::VFNMSUBSSr132m_Int,    0 },
-    { X86::VFNMSUBSDr132r_Int,    X86::VFNMSUBSDr132m_Int,    0 },
+    { X86::VFNMSUBSSr213r_Int,    X86::VFNMSUBSSr213m_Int,    0 },
+    { X86::VFNMSUBSDr213r_Int,    X86::VFNMSUBSDr213m_Int,    0 },
 
     { X86::VFNMSUBPSr231r,        X86::VFNMSUBPSr231m,        TB_ALIGN_16 },
     { X86::VFNMSUBPDr231r,        X86::VFNMSUBPDr231m,        TB_ALIGN_16 },
@@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFNMSUBPDr132rY,       X86::VFNMSUBPDr132mY,       TB_ALIGN_32 },
     { X86::VFNMSUBPSr213rY,       X86::VFNMSUBPSr213mY,       TB_ALIGN_32 },
     { X86::VFNMSUBPDr213rY,       X86::VFNMSUBPDr213mY,       TB_ALIGN_32 },
-    { X86::VFNMSUBPSr132r_Int,    X86::VFNMSUBPSr132m_Int,    TB_ALIGN_16 },
-    { X86::VFNMSUBPDr132r_Int,    X86::VFNMSUBPDr132m_Int,    TB_ALIGN_16 },
-    { X86::VFNMSUBPSr132rY_Int,   X86::VFNMSUBPSr132mY_Int,   TB_ALIGN_32 },
-    { X86::VFNMSUBPDr132rY_Int,   X86::VFNMSUBPDr132mY_Int,   TB_ALIGN_32 },
 
     { X86::VFMADDSUBPSr231r,      X86::VFMADDSUBPSr231m,      TB_ALIGN_16 },
     { X86::VFMADDSUBPDr231r,      X86::VFMADDSUBPDr231m,      TB_ALIGN_16 },
@@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMADDSUBPDr132rY,     X86::VFMADDSUBPDr132mY,     TB_ALIGN_32 },
     { X86::VFMADDSUBPSr213rY,     X86::VFMADDSUBPSr213mY,     TB_ALIGN_32 },
     { X86::VFMADDSUBPDr213rY,     X86::VFMADDSUBPDr213mY,     TB_ALIGN_32 },
-    { X86::VFMADDSUBPSr132r_Int,  X86::VFMADDSUBPSr132m_Int,  TB_ALIGN_16 },
-    { X86::VFMADDSUBPDr132r_Int,  X86::VFMADDSUBPDr132m_Int,  TB_ALIGN_16 },
-    { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 },
-    { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 },
 
     { X86::VFMSUBADDPSr231r,      X86::VFMSUBADDPSr231m,      TB_ALIGN_16 },
     { X86::VFMSUBADDPDr231r,      X86::VFMSUBADDPDr231m,      TB_ALIGN_16 },
@@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VFMSUBADDPDr132rY,     X86::VFMSUBADDPDr132mY,     TB_ALIGN_32 },
     { X86::VFMSUBADDPSr213rY,     X86::VFMSUBADDPSr213mY,     TB_ALIGN_32 },
     { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_32 },
-    { X86::VFMSUBADDPSr132r_Int,  X86::VFMSUBADDPSr132m_Int,  TB_ALIGN_16 },
-    { X86::VFMSUBADDPDr132r_Int,  X86::VFMSUBADDPDr132m_Int,  TB_ALIGN_16 },
-    { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 },
-    { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 },
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
@@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
     SrcReg = MI.getOperand(1).getReg();
     DstReg = MI.getOperand(0).getReg();
     switch (MI.getOpcode()) {
-    default:
-      llvm_unreachable(0);
+    default: llvm_unreachable("Unreachable!");
     case X86::MOVSX16rr8:
     case X86::MOVZX16rr8:
     case X86::MOVSX32rr8:
@@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
 /// regIsPICBase - Return true if register is PIC base (i.e.g defined by
 /// X86::MOVPC32r.
 static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+  // Don't waste compile time scanning use-def chains of physregs.
+  if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
+    return false;
   bool isPICBase = false;
   for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
          E = MRI.def_end(); I != E; ++I) {
@@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
                                                 AliasAnalysis *AA) const {
   switch (MI->getOpcode()) {
   default: break;
-    case X86::MOV8rm:
-    case X86::MOV16rm:
-    case X86::MOV32rm:
-    case X86::MOV64rm:
-    case X86::LD_Fp64m:
-    case X86::MOVSSrm:
-    case X86::MOVSDrm:
-    case X86::MOVAPSrm:
-    case X86::MOVUPSrm:
-    case X86::MOVAPDrm:
-    case X86::MOVDQArm:
-    case X86::VMOVSSrm:
-    case X86::VMOVSDrm:
-    case X86::VMOVAPSrm:
-    case X86::VMOVUPSrm:
-    case X86::VMOVAPDrm:
-    case X86::VMOVDQArm:
-    case X86::VMOVAPSYrm:
-    case X86::VMOVUPSYrm:
-    case X86::VMOVAPDYrm:
-    case X86::VMOVDQAYrm:
-    case X86::MMX_MOVD64rm:
-    case X86::MMX_MOVQ64rm:
-    case X86::FsVMOVAPSrm:
-    case X86::FsVMOVAPDrm:
-    case X86::FsMOVAPSrm:
-    case X86::FsMOVAPDrm: {
-      // Loads from constant pools are trivially rematerializable.
-      if (MI->getOperand(1).isReg() &&
-          MI->getOperand(2).isImm() &&
-          MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-          MI->isInvariantLoad(AA)) {
-        unsigned BaseReg = MI->getOperand(1).getReg();
-        if (BaseReg == 0 || BaseReg == X86::RIP)
-          return true;
-        // Allow re-materialization of PIC load.
-        if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
-          return false;
-        const MachineFunction &MF = *MI->getParent()->getParent();
-        const MachineRegisterInfo &MRI = MF.getRegInfo();
-        bool isPICBase = false;
-        for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
-               E = MRI.def_end(); I != E; ++I) {
-          MachineInstr *DefMI = I.getOperand().getParent();
-          if (DefMI->getOpcode() != X86::MOVPC32r)
-            return false;
-          assert(!isPICBase && "More than one PIC base?");
-          isPICBase = true;
-        }
-        return isPICBase;
-      }
-      return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp64m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::VMOVSSrm:
+  case X86::VMOVSDrm:
+  case X86::VMOVAPSrm:
+  case X86::VMOVUPSrm:
+  case X86::VMOVAPDrm:
+  case X86::VMOVDQArm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsVMOVAPSrm:
+  case X86::FsVMOVAPDrm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm: {
+    // Loads from constant pools are trivially rematerializable.
+    if (MI->getOperand(1).isReg() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+        MI->isInvariantLoad(AA)) {
+      unsigned BaseReg = MI->getOperand(1).getReg();
+      if (BaseReg == 0 || BaseReg == X86::RIP)
+        return true;
+      // Allow re-materialization of PIC load.
+      if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+        return false;
+      const MachineFunction &MF = *MI->getParent()->getParent();
+      const MachineRegisterInfo &MRI = MF.getRegInfo();
+      return regIsPICBase(BaseReg, MRI);
     }
+    return false;
+  }
 
-     case X86::LEA32r:
-     case X86::LEA64r: {
-       if (MI->getOperand(2).isImm() &&
-           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-           !MI->getOperand(4).isReg()) {
-         // lea fi#, lea GV, etc. are all rematerializable.
-         if (!MI->getOperand(1).isReg())
-           return true;
-         unsigned BaseReg = MI->getOperand(1).getReg();
-         if (BaseReg == 0)
-           return true;
-         // Allow re-materialization of lea PICBase + x.
-         const MachineFunction &MF = *MI->getParent()->getParent();
-         const MachineRegisterInfo &MRI = MF.getRegInfo();
-         return regIsPICBase(BaseReg, MRI);
-       }
-       return false;
-     }
+  case X86::LEA32r:
+  case X86::LEA64r: {
+    if (MI->getOperand(2).isImm() &&
+        MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+        !MI->getOperand(4).isReg()) {
+      // lea fi#, lea GV, etc. are all rematerializable.
+      if (!MI->getOperand(1).isReg())
+        return true;
+      unsigned BaseReg = MI->getOperand(1).getReg();
+      if (BaseReg == 0)
+        return true;
+      // Allow re-materialization of lea PICBase + x.
+      const MachineFunction &MF = *MI->getParent()->getParent();
+      const MachineRegisterInfo &MRI = MF.getRegInfo();
+      return regIsPICBase(BaseReg, MRI);
+    }
+    return false;
+  }
   }
 
   // All other instructions marked M_REMATERIALIZABLE are always trivially
@@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
   case X86::MOV64r0: {
     if (!isSafeToClobberEFLAGS(MBB, I)) {
       switch (Opc) {
-      default: break;
+      default: llvm_unreachable("Unreachable!");
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
       case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
@@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
                                     get(Opc), leaOutReg);
   switch (MIOpc) {
-  default:
-    llvm_unreachable(0);
+  default: llvm_unreachable("Unreachable!");
   case X86::SHL16ri: {
     unsigned ShAmt = MI->getOperand(2).getImm();
     MIB.addReg(0).addImm(1 << ShAmt)
@@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     MI->getOperand(3).setImm(Size-Amt);
     return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
   }
-  case X86::CMOVB16rr:
-  case X86::CMOVB32rr:
-  case X86::CMOVB64rr:
-  case X86::CMOVAE16rr:
-  case X86::CMOVAE32rr:
-  case X86::CMOVAE64rr:
-  case X86::CMOVE16rr:
-  case X86::CMOVE32rr:
-  case X86::CMOVE64rr:
-  case X86::CMOVNE16rr:
-  case X86::CMOVNE32rr:
-  case X86::CMOVNE64rr:
-  case X86::CMOVBE16rr:
-  case X86::CMOVBE32rr:
-  case X86::CMOVBE64rr:
-  case X86::CMOVA16rr:
-  case X86::CMOVA32rr:
-  case X86::CMOVA64rr:
-  case X86::CMOVL16rr:
-  case X86::CMOVL32rr:
-  case X86::CMOVL64rr:
-  case X86::CMOVGE16rr:
-  case X86::CMOVGE32rr:
-  case X86::CMOVGE64rr:
-  case X86::CMOVLE16rr:
-  case X86::CMOVLE32rr:
-  case X86::CMOVLE64rr:
-  case X86::CMOVG16rr:
-  case X86::CMOVG32rr:
-  case X86::CMOVG64rr:
-  case X86::CMOVS16rr:
-  case X86::CMOVS32rr:
-  case X86::CMOVS64rr:
-  case X86::CMOVNS16rr:
-  case X86::CMOVNS32rr:
-  case X86::CMOVNS64rr:
-  case X86::CMOVP16rr:
-  case X86::CMOVP32rr:
-  case X86::CMOVP64rr:
-  case X86::CMOVNP16rr:
-  case X86::CMOVNP32rr:
-  case X86::CMOVNP64rr:
-  case X86::CMOVO16rr:
-  case X86::CMOVO32rr:
-  case X86::CMOVO64rr:
-  case X86::CMOVNO16rr:
-  case X86::CMOVNO32rr:
-  case X86::CMOVNO64rr: {
-    unsigned Opc = 0;
+  case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
+  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
+  case X86::CMOVE16rr:  case X86::CMOVE32rr:  case X86::CMOVE64rr:
+  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
+  case X86::CMOVA16rr:  case X86::CMOVA32rr:  case X86::CMOVA64rr:
+  case X86::CMOVL16rr:  case X86::CMOVL32rr:  case X86::CMOVL64rr:
+  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+  case X86::CMOVG16rr:  case X86::CMOVG32rr:  case X86::CMOVG64rr:
+  case X86::CMOVS16rr:  case X86::CMOVS32rr:  case X86::CMOVS64rr:
+  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+  case X86::CMOVP16rr:  case X86::CMOVP32rr:  case X86::CMOVP64rr:
+  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+  case X86::CMOVO16rr:  case X86::CMOVO32rr:  case X86::CMOVO64rr:
+  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
+    unsigned Opc;
     switch (MI->getOpcode()) {
-    default: break;
+    default: llvm_unreachable("Unreachable!");
     case X86::CMOVB16rr:  Opc = X86::CMOVAE16rr; break;
     case X86::CMOVB32rr:  Opc = X86::CMOVAE32rr; break;
     case X86::CMOVB64rr:  Opc = X86::CMOVAE64rr; break;
@@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
 /// whether it has memory operand.
 static unsigned getSETFromCond(X86::CondCode CC,
                                bool HasMemoryOperand) {
-  static const unsigned Opc[16][2] = {
+  static const uint16_t Opc[16][2] = {
     { X86::SETAr,  X86::SETAm  },
     { X86::SETAEr, X86::SETAEm },
     { X86::SETBr,  X86::SETBm  },
@@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC,
 /// register size in bytes, and operand type.
 static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
                                 bool HasMemoryOperand) {
-  static const unsigned Opc[32][3] = {
+  static const uint16_t Opc[32][3] = {
     { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
     { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
     { X86::CMOVB16rr,  X86::CMOVB32rr,  X86::CMOVB64rr  },
@@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   // SrcReg(GR64)  -> DestReg(VR64)
 
   if (X86::GR64RegClass.contains(DestReg)) {
-    if (X86::VR128RegClass.contains(SrcReg)) {
+    if (X86::VR128RegClass.contains(SrcReg))
       // Copy from a VR128 register to a GR64 register.
       return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr;
-    } else if (X86::VR64RegClass.contains(SrcReg)) {
+    if (X86::VR64RegClass.contains(SrcReg))
       // Copy from a VR64 register to a GR64 register.
       return X86::MOVSDto64rr;
-    }
   } else if (X86::GR64RegClass.contains(SrcReg)) {
     // Copy from a GR64 register to a VR128 register.
     if (X86::VR128RegClass.contains(DestReg))
       return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr;
     // Copy from a GR64 register to a VR64 register.
-    else if (X86::VR64RegClass.contains(DestReg))
+    if (X86::VR64RegClass.contains(DestReg))
       return X86::MOV64toSDrr;
   }
 
@@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   // SrcReg(GR32) -> DestReg(FR32)
 
   if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg))
-      // Copy from a FR32 register to a GR32 register.
-      return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
+    // Copy from a FR32 register to a GR32 register.
+    return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr;
 
   if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
-      // Copy from a GR32 register to a FR32 register.
-      return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
+    // Copy from a GR32 register to a FR32 register.
+    return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr;
 
   return 0;
 }
@@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                bool KillSrc) const {
   // First deal with the normal symmetric copies.
   bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
-  unsigned Opc = 0;
+  unsigned Opc;
   if (X86::GR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MOV64rr;
   else if (X86::GR32RegClass.contains(DestReg, SrcReg))
@@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       BuildMI(MBB, MI, DL, get(X86::PUSHF64));
       BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
       return;
-    } else if (X86::GR32RegClass.contains(DestReg)) {
+    }
+    if (X86::GR32RegClass.contains(DestReg)) {
       BuildMI(MBB, MI, DL, get(X86::PUSHF32));
       BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
       return;
@@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
         .addReg(SrcReg, getKillRegState(KillSrc));
       BuildMI(MBB, MI, DL, get(X86::POPF64));
       return;
-    } else if (X86::GR32RegClass.contains(SrcReg)) {
+    }
+    if (X86::GR32RegClass.contains(SrcReg)) {
       BuildMI(MBB, MI, DL, get(X86::PUSH32r))
         .addReg(SrcReg, getKillRegState(KillSrc));
       BuildMI(MBB, MI, DL, get(X86::POPF32));
@@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
     CmpMask = ~0;
     CmpValue = MI->getOperand(1).getImm();
     return true;
+  // A SUB can be used to perform comparison.
+  case X86::SUB64rm:
+  case X86::SUB32rm:
+  case X86::SUB16rm:
+  case X86::SUB8rm:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = 0;
+    CmpMask = ~0;
+    CmpValue = 0;
+    return true;
+  case X86::SUB64rr:
+  case X86::SUB32rr:
+  case X86::SUB16rr:
+  case X86::SUB8rr:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = MI->getOperand(2).getReg();
+    CmpMask = ~0;
+    CmpValue = 0;
+    return true;
+  case X86::SUB64ri32:
+  case X86::SUB64ri8:
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB16ri:
+  case X86::SUB16ri8:
+  case X86::SUB8ri:
+    SrcReg = MI->getOperand(1).getReg();
+    SrcReg2 = 0;
+    CmpMask = ~0;
+    CmpValue = MI->getOperand(2).getImm();
+    return true;
   case X86::CMP64rr:
   case X86::CMP32rr:
   case X86::CMP16rr:
@@ -3145,6 +3113,55 @@ bool X86InstrInfo::
 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
                      int CmpMask, int CmpValue,
                      const MachineRegisterInfo *MRI) const {
+  // Check whether we can replace SUB with CMP.
+  unsigned NewOpcode = 0;
+  switch (CmpInstr->getOpcode()) {
+  default: break;
+  case X86::SUB64ri32:
+  case X86::SUB64ri8:
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB16ri:
+  case X86::SUB16ri8:
+  case X86::SUB8ri:
+  case X86::SUB64rm:
+  case X86::SUB32rm:
+  case X86::SUB16rm:
+  case X86::SUB8rm:
+  case X86::SUB64rr:
+  case X86::SUB32rr:
+  case X86::SUB16rr:
+  case X86::SUB8rr: {
+    if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+      return false;
+    // There is no use of the destination register, we can replace SUB with CMP.
+    switch (CmpInstr->getOpcode()) {
+    default: llvm_unreachable("Unreachable!");
+    case X86::SUB64rm:   NewOpcode = X86::CMP64rm;   break;
+    case X86::SUB32rm:   NewOpcode = X86::CMP32rm;   break;
+    case X86::SUB16rm:   NewOpcode = X86::CMP16rm;   break;
+    case X86::SUB8rm:    NewOpcode = X86::CMP8rm;    break;
+    case X86::SUB64rr:   NewOpcode = X86::CMP64rr;   break;
+    case X86::SUB32rr:   NewOpcode = X86::CMP32rr;   break;
+    case X86::SUB16rr:   NewOpcode = X86::CMP16rr;   break;
+    case X86::SUB8rr:    NewOpcode = X86::CMP8rr;    break;
+    case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
+    case X86::SUB64ri8:  NewOpcode = X86::CMP64ri8;  break;
+    case X86::SUB32ri:   NewOpcode = X86::CMP32ri;   break;
+    case X86::SUB32ri8:  NewOpcode = X86::CMP32ri8;  break;
+    case X86::SUB16ri:   NewOpcode = X86::CMP16ri;   break;
+    case X86::SUB16ri8:  NewOpcode = X86::CMP16ri8;  break;
+    case X86::SUB8ri:    NewOpcode = X86::CMP8ri;    break;
+    }
+    CmpInstr->setDesc(get(NewOpcode));
+    CmpInstr->RemoveOperand(0);
+    // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
+    if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
+        NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
+      return false;
+  }
+  }
+
   // Get the unique definition of SrcReg.
   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   if (!MI) return false;
@@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
   for (++I; I != E; ++I) {
     const MachineInstr &Instr = *I;
-    if (Instr.modifiesRegister(X86::EFLAGS, TRI)) {
+    bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
+    bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
+    // We should check the usage if this instruction uses and updates EFLAGS.
+    if (!UseEFLAGS && ModifyEFLAGS) {
       // It is safe to remove CmpInstr if EFLAGS is updated again.
       IsSafe = true;
       break;
     }
-    if (!Instr.readsRegister(X86::EFLAGS, TRI))
+    if (!UseEFLAGS && !ModifyEFLAGS)
       continue;
 
     // EFLAGS is used by this instruction.
@@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
       // instructions will be modified.
       OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
     }
-    if (Instr.killsRegister(X86::EFLAGS, TRI)) {
+    if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
+      // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
       IsSafe = true;
       break;
     }
@@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   return true;
 }
 
+/// optimizeLoadInstr - Try to remove the load by folding it to a register
+/// operand at the use. We fold the load instructions if load defines a virtual
+/// register, the virtual register is used once in the same BB, and the
+/// instructions in-between do not load or store, and have no side effects.
+MachineInstr* X86InstrInfo::
+optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
+                  unsigned &FoldAsLoadDefReg,
+                  MachineInstr *&DefMI) const {
+  if (FoldAsLoadDefReg == 0)
+    return 0;
+  // To be conservative, if there exists another load, clear the load candidate.
+  if (MI->mayLoad()) {
+    FoldAsLoadDefReg = 0;
+    return 0;
+  }
+
+  // Check whether we can move DefMI here.
+  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
+  assert(DefMI);
+  bool SawStore = false;
+  if (!DefMI->isSafeToMove(this, 0, SawStore))
+    return 0;
+
+  // We try to commute MI if possible.
+  unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
+  for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
+    // Collect information about virtual register operands of MI.
+    unsigned SrcOperandId = 0;
+    bool FoundSrcOperand = false;
+    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg != FoldAsLoadDefReg)
+        continue;
+      // Do not fold if we have a subreg use or a def or multiple uses.
+      if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+        return 0;
+
+      SrcOperandId = i;
+      FoundSrcOperand = true;
+    }
+    if (!FoundSrcOperand) return 0;
+
+    // Check whether we can fold the def into SrcOperandId.
+    SmallVector<unsigned, 8> Ops;
+    Ops.push_back(SrcOperandId);
+    MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
+    if (FoldMI) {
+      FoldAsLoadDefReg = 0;
+      return FoldMI;
+    }
+
+    if (Idx == 1) {
+      // MI was changed but it didn't help, commute it back!
+      commuteInstruction(MI, false);
+      return 0;
+    }
+
+    // Check whether we can commute MI and enable folding.
+    if (MI->isCommutable()) {
+      MachineInstr *NewMI = commuteInstruction(MI, false);
+      // Unable to commute.
+      if (!NewMI) return 0;
+      if (NewMI != MI) {
+        // New instruction. It doesn't need to be kept.
+        NewMI->eraseFromParent();
+        return 0;
+      }
+    }
+  }
+  return 0;
+}
+
 /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
 /// instruction with two undef reads of the register being defined.  This is
 /// used for mapping:
@@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     OpcodeTablePtr = &RegOp2MemOpTable1;
   } else if (i == 2) {
     OpcodeTablePtr = &RegOp2MemOpTable2;
+  } else if (i == 3) {
+    OpcodeTablePtr = &RegOp2MemOpTable3;
   }
 
   // If table selected...
@@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                getUndefRegState(MO.isUndef()));
   }
   // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
-  unsigned NewOpc = 0;
   switch (DataMI->getOpcode()) {
   default: break;
   case X86::CMP64ri32:
@@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
     MachineOperand &MO0 = DataMI->getOperand(0);
     MachineOperand &MO1 = DataMI->getOperand(1);
     if (MO1.getImm() == 0) {
+      unsigned NewOpc;
       switch (DataMI->getOpcode()) {
-      default: break;
+      default: llvm_unreachable("Unreachable!");
       case X86::CMP64ri8:
       case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
       case X86::CMP32ri8:
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ec9b2e6..b6f69af 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -387,6 +387,18 @@ public:
                                     unsigned SrcReg2, int CmpMask, int CmpValue,
                                     const MachineRegisterInfo *MRI) const;
 
+  /// optimizeLoadInstr - Try to remove the load by folding it to a register
+  /// operand at the use. We fold the load instructions if and only if the
+  /// def and use are in the same BB. We only look at one load and see
+  /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register
+  /// defined by the load we are trying to fold. DefMI returns the machine
+  /// instruction that defines FoldAsLoadDefReg, and the function returns
+  /// the machine instruction generated due to folding.
+  virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI,
+                        const MachineRegisterInfo *MRI,
+                        unsigned &FoldAsLoadDefReg,
+                        MachineInstr *&DefMI) const;
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index e4edd36..c8f40bb 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -251,7 +251,7 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                                   (iPTR 0))))))],
                           IIC_MMX_MOVQ_RR>;
 
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
                             (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
           [(set VR128:$dst,
             (v2i64 (scalar_to_vector
@@ -259,7 +259,7 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
                            IIC_MMX_MOVQ_RR>;
 
 let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
                        (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [],
                        IIC_MMX_MOVQ_RR>;
 
@@ -554,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
                                 (int_x86_mmx_pmovmskb VR64:$src))]>;
 
 
-// MMX to XMM for vector types
-def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
-                            [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
-          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
-          (v2i64 (MOVQI2PQIrm addr:$src))>;
-
-def : Pat<(v2i64 (MMX_X86movq2dq 
-                    (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
-          (v2i64 (MOVDI2PDIrm addr:$src))>;
-
 // Low word of XMM to MMX.
 def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
                             [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index c2d169a..220c06d 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
 
 // A vector extract of the first f32/f64 position is a subregister copy
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+          (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
 def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
-          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+          (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
 
 // A 128-bit subvector extract from the first 256-bit vector position
 // is a subregister copy that needs no instruction.
@@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)),
 
 // Implicitly promote a 32-bit scalar to a vector.
 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
-          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+          (COPY_TO_REGCLASS FR32:$src, VR128)>;
 def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
-          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+          (COPY_TO_REGCLASS FR32:$src, VR128)>;
 // Implicitly promote a 64-bit scalar to a vector.
 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
-          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+          (COPY_TO_REGCLASS FR64:$src, VR128)>;
 def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
-          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+          (COPY_TO_REGCLASS FR64:$src, VR128)>;
 
 // Bitcasts between 128-bit vector types. Return the original type since
 // no instruction is needed for the conversion
@@ -562,59 +562,57 @@ let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
             (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (VMOVSSrr (v4f32 (V_SET0)),
-                      (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+            (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (VMOVSSrr (v4i32 (V_SET0)),
-                      (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+            (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
             (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
 
   // Move low f32 and clear high bits.
   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
             (SUBREG_TO_REG (i32 0),
-              (VMOVSSrr (v4f32 (V_SET0)),
-                        (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+             (VMOVSSrr (v4f32 (V_SET0)),
+                       (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>;
   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
             (SUBREG_TO_REG (i32 0),
-              (VMOVSSrr (v4i32 (V_SET0)),
-                        (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
+             (VMOVSSrr (v4i32 (V_SET0)),
+                       (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>;
   }
 
   let AddedComplexity = 20 in {
   // MOVSSrm zeros the high parts of the register; represent this
   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
 
   // MOVSDrm zeros the high parts of the register; represent this
   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>;
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
                    (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
+            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))),
-            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
   }
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))),
@@ -628,70 +626,68 @@ let Predicates = [HasAVX] in {
                            sub_xmm)>;
   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
                    (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
 
   // Move low f64 and clear high bits.
   def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
             (SUBREG_TO_REG (i32 0),
-              (VMOVSDrr (v2f64 (V_SET0)),
-                        (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+             (VMOVSDrr (v2f64 (V_SET0)),
+                       (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>;
 
   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
             (SUBREG_TO_REG (i32 0),
-              (VMOVSDrr (v2i64 (V_SET0)),
-                        (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
+             (VMOVSDrr (v2i64 (V_SET0)),
+                       (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                    addr:$dst),
-            (VMOVSSmr addr:$dst,
-                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
   def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
                    addr:$dst),
-            (VMOVSDmr addr:$dst,
-                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+            (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
 
   // Shuffle with VMOVSS
   def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
             (VMOVSSrr (v4i32 VR128:$src1),
-                      (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+                      (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>;
   def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
             (VMOVSSrr (v4f32 VR128:$src1),
-                      (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+                      (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>;
 
   // 256-bit variants
   def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
             (SUBREG_TO_REG (i32 0),
-                (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
-                          (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+              (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm),
+                        (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)),
+              sub_xmm)>;
   def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
             (SUBREG_TO_REG (i32 0),
-                (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
-                          (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+              (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm),
+                        (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)),
+              sub_xmm)>;
 
   // Shuffle with VMOVSD
   def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr (v2i64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr (v2f64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 
   // 256-bit variants
   def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
             (SUBREG_TO_REG (i32 0),
-                (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
-                          (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+              (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm),
+                        (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)),
+              sub_xmm)>;
   def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
             (SUBREG_TO_REG (i32 0),
-                (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
-                          (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+              (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm),
+                        (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)),
+              sub_xmm)>;
 
 
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
@@ -699,17 +695,13 @@ let Predicates = [HasAVX] in {
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
-                                                   sub_sd))>;
+            (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 }
 
 let Predicates = [HasSSE1] in {
@@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in {
   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
             (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
   def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (MOVSSrr (v4f32 (V_SET0)),
-                     (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+            (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
   def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (MOVSSrr (v4i32 (V_SET0)),
-                     (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+            (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>;
   }
 
   let AddedComplexity = 20 in {
-  // MOVSSrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG.
+  // MOVSSrm already zeros the high parts of the register.
   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
   }
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                    addr:$dst),
-            (MOVSSmr addr:$dst,
-                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
 
   // Shuffle with MOVSS
   def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4i32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+            (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
   def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4f32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+            (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>;
 }
 
 let Predicates = [HasSSE2] in {
@@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in {
   }
 
   let AddedComplexity = 20 in {
-  // MOVSDrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG.
+  // MOVSDrm already zeros the high parts of the register.
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   def : Pat<(v2f64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+            (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
   }
 
   // Extract and store.
   def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
                    addr:$dst),
-            (MOVSDmr addr:$dst,
-                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+            (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
 
   // Shuffle with MOVSD
   def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2i64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2f64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
   // is during lowering, where it's not possible to recognize the fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
   def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+            (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
 }
 
 multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm, Domain d, OpndItins itins> {
+                       X86MemOperand x86memop, string asm, Domain d,
+                       OpndItins itins> {
+let neverHasSideEffects = 1 in {
   def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
-                        itins.rr, d>;
+             [], itins.rr, d>;
+  let mayLoad = 1 in
   def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
-                        itins.rm, d>;
+             [], itins.rm, d>;
+}
 }
 
 multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
@@ -1443,7 +1426,7 @@ defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                                 SSE_CVT_SS2SI_32>,
                                 XS, VEX, VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}",
+                                "cvttss2si{q}\t{$src, $dst|$dst, $src}",
                                 SSE_CVT_SS2SI_64>,
                                 XS, VEX, VEX_W, VEX_LIG;
 defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
@@ -1451,7 +1434,7 @@ defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
                                 SSE_CVT_SD2SI>,
                                 XD, VEX, VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}",
+                                "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
                                 SSE_CVT_SD2SI>,
                                 XD, VEX, VEX_W, VEX_LIG;
 
@@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
                                   XS, VEX_4V, VEX_W, VEX_LIG;
 defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
                                   XD, VEX_4V, VEX_LIG;
-defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
-                                  XD, VEX_4V, VEX_LIG;
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
                                   XD, VEX_4V, VEX_W, VEX_LIG;
 
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+                (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>;
+def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}",
+                (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+
 let Predicates = [HasAVX], AddedComplexity = 1 in {
   def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
             (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
 // and/or XMM operand(s).
 
 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
                          string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
               [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
+              [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>;
 }
 
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
               itins.rm>;
 }
 
-defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                  f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32,
+                  int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}",
+                  SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
 defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                  int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
-                  SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
+                    int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}",
+                    SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
 
 defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
+                 sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
 defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
-                  f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+                   sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
 
 
 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
           int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
           SSE_CVT_Scalar, 0>, XS, VEX_4V;
 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
           SSE_CVT_Scalar, 0>, XS, VEX_4V,
           VEX_W;
 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
           int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
           SSE_CVT_Scalar, 0>, XD, VEX_4V;
 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
           SSE_CVT_Scalar, 0>, XD,
           VEX_4V, VEX_W;
 
@@ -1587,96 +1574,71 @@ let Constraints = "$src1 = $dst" in {
                         "cvtsi2sd", SSE_CVT_Scalar>, XD;
   defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
                         int_x86_sse2_cvtsi642sd, i64mem, loadi64,
-                        "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
+                        "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W;
 }
 
 /// SSE 1 Only
 
 // Aliases for intrinsics
 defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
-                                    f32mem, load, "cvttss2si",
+                                    ssmem, sse_load_f32, "cvttss2si",
                                     SSE_CVT_SS2SI_32>, XS, VEX;
 defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                    int_x86_sse_cvttss2si64, f32mem, load,
-                                    "cvttss2si", SSE_CVT_SS2SI_64>,
-                                    XS, VEX, VEX_W;
+                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+                                   "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+                                   XS, VEX, VEX_W;
 defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
-                                    XD, VEX;
+                                    sdmem, sse_load_f64, "cvttsd2si",
+                                    SSE_CVT_SD2SI>, XD, VEX;
 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                    int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttsd2si", SSE_CVT_SD2SI>,
-                                    XD, VEX, VEX_W;
+                                  int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+                                  "cvttsd2si{q}", SSE_CVT_SD2SI>,
+                                  XD, VEX, VEX_W;
 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
-                                    f32mem, load, "cvttss2si",
+                                    ssmem, sse_load_f32, "cvttss2si",
                                     SSE_CVT_SS2SI_32>, XS;
 defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                    int_x86_sse_cvttss2si64, f32mem, load,
-                                    "cvttss2si{q}", SSE_CVT_SS2SI_64>,
-                                    XS, REX_W;
+                                   int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
+                                   "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W;
 defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
-                                    XD;
+                                    sdmem, sse_load_f64, "cvttsd2si",
+                                    SSE_CVT_SD2SI>, XD;
 defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
-                                    int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttsd2si{q}", SSE_CVT_SD2SI>,
-                                    XD, REX_W;
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
-                               "cvtss2si{l}\t{$src, $dst|$dst, $src}",
-                               SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
-defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
-                               "cvtss2si\t{$src, $dst|$dst, $src}",
-                               SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
-defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+                                  int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
+                                  "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
+
+defm VCVTSS2SI   : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                                  ssmem, sse_load_f32, "cvtss2si{l}",
+                                  SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
+defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+                                  ssmem, sse_load_f32, "cvtss2si{q}",
+                                  SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
+
+defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                               ssmem, sse_load_f32, "cvtss2si{l}",
+                               SSE_CVT_SS2SI_32>, XS;
+defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+                                 ssmem, sse_load_f32, "cvtss2si{q}",
+                                 SSE_CVT_SS2SI_64>, XS, REX_W;
+
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
-                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
-                               Requires<[HasAVX]>;
-defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+                               SSEPackedSingle, SSE_CVT_PS>,
+                               TB, VEX, Requires<[HasAVX]>;
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
-                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX,
-                               Requires<[HasAVX]>;
-}
-
-let Pattern = []<dag>, neverHasSideEffects = 1 in {
-defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
-                          "cvtss2si{l}\t{$src, $dst|$dst, $src}",
-                          SSE_CVT_SS2SI_32>, XS;
-defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
-                          "cvtss2si{q}\t{$src, $dst|$dst, $src}",
-                          SSE_CVT_SS2SI_64>, XS, REX_W;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
-                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
-                            SSEPackedSingle, SSE_CVT_PS>, TB,
-                            Requires<[HasSSE2]>;
-}
+                               SSEPackedSingle, SSE_CVT_PS>,
+                               TB, VEX, Requires<[HasAVX]>;
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse_cvtss2si VR128:$src),
-            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-  def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
-            (VCVTSS2SIrm addr:$src)>;
-  def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
-            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-  def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
-            (VCVTSS2SI64rm addr:$src)>;
-}
-
-let Predicates = [HasSSE1] in {
-  def : Pat<(int_x86_sse_cvtss2si VR128:$src),
-            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-  def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
-            (CVTSS2SIrm addr:$src)>;
-  def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
-            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-  def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
-            (CVTSS2SI64rm addr:$src)>;
-}
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle, SSE_CVT_PS>,
+                            TB, Requires<[HasSSE2]>;
 
 /// SSE 2 Only
 
 // Convert scalar double to scalar single
+let neverHasSideEffects = 1 in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                        (ins FR64:$src1, FR64:$src2),
                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
@@ -1687,6 +1649,7 @@ def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [], IIC_SSE_CVT_Scalar_RM>,
                       XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+}
 
 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
           Requires<[HasAVX]>;
@@ -1702,17 +1665,37 @@ def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       XD,
                   Requires<[HasSSE2, OptForSize]>;
 
-defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
-                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
-                      SSE_CVT_Scalar, 0>,
-                      XS, VEX_4V;
-let Constraints = "$src1 = $dst" in
-defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
-                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
-                      SSE_CVT_Scalar>, XS;
+def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
+                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+                       IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>;
+def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
+                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+                                          VR128:$src1, sse_load_f64:$src2))],
+                       IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>;
+
+let Constraints = "$src1 = $dst" in {
+def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
+                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
+                       IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>;
+def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
+                       (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
+                       "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtsd2ss
+                                          VR128:$src1, sse_load_f64:$src2))],
+                       IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>;
+}
 
 // Convert scalar single to scalar double
 // SSE2 instructions with XS prefix
+let neverHasSideEffects = 1 in {
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR32:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1724,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [], IIC_SSE_CVT_Scalar_RM>,
                     XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+}
 
-let Predicates = [HasAVX] in {
+let AddedComplexity = 1 in { // give AVX priority
   def : Pat<(f64 (fextend FR32:$src)),
-            (VCVTSS2SDrr FR32:$src, FR32:$src)>;
+            (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>;
   def : Pat<(fextend (loadf32 addr:$src)),
-            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-  def : Pat<(extloadf32 addr:$src),
-            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>;
-}
+            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>;
 
-def : Pat<(extloadf32 addr:$src),
-          (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>,
-          Requires<[HasAVX, OptForSpeed]>;
+  def : Pat<(extloadf32 addr:$src),
+            (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+            Requires<[HasAVX, OptForSize]>;
+  def : Pat<(extloadf32 addr:$src),
+            (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+            Requires<[HasAVX, OptForSpeed]>;
+} // AddedComplexity = 1
 
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
@@ -1762,67 +1747,60 @@ def : Pat<(extloadf32 addr:$src),
 def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       VR128:$src2))],
-                                       IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
-                    Requires<[HasAVX]>;
+                    [(set VR128:$dst,
+                      (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+                    IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>;
 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
-                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       (load addr:$src2)))],
-                                       IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
-                    Requires<[HasAVX]>;
+                    [(set VR128:$dst,
+                      (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+                    IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       VR128:$src2))],
-                                       IIC_SSE_CVT_Scalar_RR>, XS,
-                    Requires<[HasSSE2]>;
+                    [(set VR128:$dst,
+                      (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
+                    IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>;
 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
-                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                      (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
-                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       (load addr:$src2)))],
-                                       IIC_SSE_CVT_Scalar_RM>, XS,
-                    Requires<[HasSSE2]>;
+                    [(set VR128:$dst,
+                      (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
+                    IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>;
 }
 
 // Convert packed single/double fp to doubleword
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                       "cvtps2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
                        IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                       "cvtps2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
                        IIC_SSE_CVT_PS_RM>, VEX;
 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR256:$dst,
+                          (int_x86_avx_cvt_ps2dq_256 VR256:$src))],
                         IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR256:$dst,
+                          (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))],
                         IIC_SSE_CVT_PS_RM>, VEX;
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                     "cvtps2dq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
                      IIC_SSE_CVT_PS_RR>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                     "cvtps2dq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))],
                      IIC_SSE_CVT_PS_RM>;
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
-            (VCVTPS2DQrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
-            (VCVTPS2DQrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
-  def : Pat<(int_x86_sse2_cvtps2dq VR128:$src),
-            (CVTPS2DQrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)),
-            (CVTPS2DQrm addr:$src)>;
-}
 
 // Convert Packed Double FP to Packed DW Integers
 let Predicates = [HasAVX] in {
@@ -1830,77 +1808,74 @@ let Predicates = [HasAVX] in {
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                       VEX;
 
 // XMM only
 def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
 def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX;
 
 // YMM only
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX;
 def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
-                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>,
+                       VEX, VEX_L;
 def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
 }
 
 def CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
                       IIC_SSE_CVT_PD_RM>;
 def CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+                      "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
                       IIC_SSE_CVT_PD_RR>;
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
-            (VCVTPD2DQrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
-            (VCVTPD2DQXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
-  def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src),
-            (CVTPD2DQrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)),
-            (CVTPD2DQrm addr:$src)>;
-}
-
 // Convert with truncation packed single/double fp to doubleword
 // SSE2 packed instructions with XS prefix
-def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_sse2_cvttps2dq VR128:$src))],
-                          IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                           (memopv4f32 addr:$src)))],
-                                           IIC_SSE_CVT_PS_RM>, VEX;
-def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
-                         [(set VR256:$dst,
-                           (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
-                           IIC_SSE_CVT_PS_RR>, VEX;
-def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                         [(set VR128:$dst,
+                           (int_x86_sse2_cvttps2dq VR128:$src))],
+                         IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
-                         [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
-                                            (memopv8f32 addr:$src)))],
-                                            IIC_SSE_CVT_PS_RM>, VEX;
-
-def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,
-                            (int_x86_sse2_cvttps2dq VR128:$src))],
-                            IIC_SSE_CVT_PS_RR>;
-def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,
-                            (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
-                            IIC_SSE_CVT_PS_RM>;
+                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+                                            (memopv4f32 addr:$src)))],
+                         IIC_SSE_CVT_PS_RM>, VEX;
+def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                          "cvttps2dq\t{$src, $dst|$dst, $src}",
+                          [(set VR256:$dst,
+                            (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+                          IIC_SSE_CVT_PS_RR>, VEX;
+def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                          "cvttps2dq\t{$src, $dst|$dst, $src}",
+                          [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+                                             (memopv8f32 addr:$src)))],
+                          IIC_SSE_CVT_PS_RM>, VEX;
+
+def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvttps2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+                       IIC_SSE_CVT_PS_RR>;
+def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvttps2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+                       IIC_SSE_CVT_PS_RM>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
@@ -1952,16 +1927,6 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                               (int_x86_sse2_cvttpd2dq VR128:$src))],
                               IIC_SSE_CVT_PD_RR>, VEX;
 
-def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
-                      IIC_SSE_CVT_PD_RR>;
-def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
-                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                        (memopv2f64 addr:$src)))],
-                                        IIC_SSE_CVT_PD_RM>;
-
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
@@ -1977,10 +1942,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
 
 // YMM only
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst,
+                           (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
                          IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
-                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [],
+                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst,
+                          (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))],
                          IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
 def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
@@ -1992,82 +1961,82 @@ let Predicates = [HasAVX] in {
             (VCVTTPD2DQYrm addr:$src)>;
 } // Predicates = [HasAVX]
 
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+                      IIC_SSE_CVT_PD_RR>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                        (memopv2f64 addr:$src)))],
+                                        IIC_SSE_CVT_PD_RM>;
+
 // Convert packed single to packed double
 let Predicates = [HasAVX] in {
                   // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
                      IIC_SSE_CVT_PD_RR>, TB, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
                      IIC_SSE_CVT_PD_RM>, TB, VEX;
 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR256:$dst,
+                       (int_x86_avx_cvt_ps2_pd_256 VR128:$src))],
                      IIC_SSE_CVT_PD_RR>, TB, VEX;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR256:$dst,
+                       (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))],
                      IIC_SSE_CVT_PD_RM>, TB, VEX;
 }
 
 let Predicates = [HasSSE2] in {
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
                        IIC_SSE_CVT_PD_RR>, TB;
+let neverHasSideEffects = 1, mayLoad = 1 in
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}", [],
                        IIC_SSE_CVT_PD_RM>, TB;
 }
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
-            (VCVTPS2PDrr VR128:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
-  def : Pat<(int_x86_sse2_cvtps2pd VR128:$src),
-            (CVTPS2PDrr VR128:$src)>;
-}
-
 // Convert Packed DW Integers to Packed Double FP
 let Predicates = [HasAVX] in {
-def VCVTDQ2PDrm  : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr  : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrm  : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
-                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDYrr  : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+let neverHasSideEffects = 1, mayLoad = 1 in
+def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                     []>, VEX;
+def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX;
+def VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR256:$dst,
+                       (int_x86_avx_cvtdq2_pd_256
+                        (bitconvert (memopv2i64 addr:$src))))]>, VEX;
+def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR256:$dst,
+                       (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX;
 }
 
-def CVTDQ2PDrm  : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+let neverHasSideEffects = 1, mayLoad = 1 in
+def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
                        IIC_SSE_CVT_PD_RR>;
-def CVTDQ2PDrr  : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
                        IIC_SSE_CVT_PD_RM>;
 
-// 128 bit register conversion intrinsics
-let Predicates = [HasAVX] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
-           (VCVTDQ2PDrr VR128:$src)>;
-
-let Predicates = [HasSSE2] in
-def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src),
-           (CVTDQ2PDrr VR128:$src)>;
-
 // AVX 256-bit register conversion intrinsics
 let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
-            (VCVTDQ2PDYrr VR128:$src)>;
-  def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
-            (VCVTDQ2PDYrm addr:$src)>;
-
-  def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
-            (VCVTPD2DQYrr VR256:$src)>;
-  def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
-            (VCVTPD2DQYrm addr:$src)>;
-
   def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
             (VCVTDQ2PDYrr VR128:$src)>;
   def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
@@ -2079,48 +2048,44 @@ let Predicates = [HasAVX] in {
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
                        IIC_SSE_CVT_PD_RR>, VEX;
 
 // XMM only
 def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
 def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
                         IIC_SSE_CVT_PD_RM>, VEX;
 
 // YMM only
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
                         IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
-                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [],
+                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))],
                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
 def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
                      IIC_SSE_CVT_PD_RR>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
                      IIC_SSE_CVT_PD_RM>;
 
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
-            (VCVTPD2PSrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
-            (VCVTPD2PSXrm addr:$src)>;
-}
-
-let Predicates = [HasSSE2] in {
-  def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src),
-            (CVTPD2PSrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)),
-            (CVTPD2PSrm addr:$src)>;
-}
-
 // AVX 256-bit register conversion intrinsics
 // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
 // whenever possible to avoid declaring two versions of each one.
@@ -2130,38 +2095,26 @@ let Predicates = [HasAVX] in {
   def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
             (VCVTDQ2PSYrm addr:$src)>;
 
-  def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
-            (VCVTPD2PSYrr VR256:$src)>;
-  def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
-            (VCVTPD2PSYrm addr:$src)>;
-
-  def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
-            (VCVTPS2DQYrr VR256:$src)>;
-  def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
-            (VCVTPS2DQYrm addr:$src)>;
-
-  def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
-            (VCVTPS2PDYrr VR128:$src)>;
-  def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
-            (VCVTPS2PDYrm addr:$src)>;
-
-  def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
-            (VCVTTPD2DQYrr VR256:$src)>;
-  def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
-            (VCVTTPD2DQYrm addr:$src)>;
-
   // Match fround and fextend for 128/256-bit conversions
   def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
             (VCVTPD2PSYrr VR256:$src)>;
   def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
             (VCVTPD2PSYrm addr:$src)>;
 
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (VCVTPS2PDrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
             (VCVTPS2PDYrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
             (VCVTPS2PDYrm addr:$src)>;
 }
 
+let Predicates = [HasSSE2] in {
+  // Match fextend for 128 conversions
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (CVTPS2PDrr VR128:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//
@@ -2593,17 +2546,13 @@ let Predicates = [HasAVX] in {
                                         OpSize, VEX;
 
   def : Pat<(i32 (X86fgetsign FR32:$src)),
-            (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                          sub_ss))>;
+            (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>;
   def : Pat<(i64 (X86fgetsign FR32:$src)),
-            (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                          sub_ss))>;
+            (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>;
   def : Pat<(i32 (X86fgetsign FR64:$src)),
-            (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                          sub_sd))>;
+            (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>;
   def : Pat<(i64 (X86fgetsign FR64:$src)),
-            (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                          sub_sd))>;
+            (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>;
 
   // Assembler Only
   def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
@@ -2628,17 +2577,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
                                      SSEPackedDouble>, TB, OpSize;
 
 def : Pat<(i32 (X86fgetsign FR32:$src)),
-          (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                       sub_ss))>, Requires<[HasSSE1]>;
+          (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+      Requires<[HasSSE1]>;
 def : Pat<(i64 (X86fgetsign FR32:$src)),
-          (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                       sub_ss))>, Requires<[HasSSE1]>;
+          (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>,
+      Requires<[HasSSE1]>;
 def : Pat<(i32 (X86fgetsign FR64:$src)),
-          (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                       sub_sd))>, Requires<[HasSSE2]>;
+          (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+      Requires<[HasSSE2]>;
 def : Pat<(i64 (X86fgetsign FR64:$src)),
-          (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                       sub_sd))>, Requires<[HasSSE2]>;
+          (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>,
+      Requires<[HasSSE2]>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Logical Instructions
@@ -2923,7 +2872,8 @@ let isCommutable = 0 in {
               basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
   defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
-              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+                VEX_4V;
   defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
               basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
                 VEX_4V, VEX_LIG;
@@ -2974,6 +2924,23 @@ let Constraints = "$src1 = $dst" in {
   }
 }
 
+let isCommutable = 1, isCodeGenOnly = 1 in {
+  defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>,
+       VEX_4V, VEX_LIG;
+  defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>,
+       basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V;
+  defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>,
+       VEX_4V, VEX_LIG;
+  defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>,
+       basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V;
+  let Constraints = "$src1 = $dst" in {
+    defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>,
+         basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>;
+    defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>,
+         basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>;
+  }
+}
+
 /// Unop Arithmetic
 /// In addition, we also have a special variant of the scalar form here to
 /// represent the associated intrinsic operation.  This form is unlike the
@@ -3236,34 +3203,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))),
 
 let Predicates = [HasAVX], AddedComplexity = 1 in {
   def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
-            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-                (VSQRTSSr (f32 (IMPLICIT_DEF)),
-                          (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
-                sub_ss)>;
+            (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)),
+                                        (COPY_TO_REGCLASS VR128:$src, FR32)),
+                              VR128)>;
   def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
             (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
 
   def : Pat<(int_x86_sse2_sqrt_sd VR128:$src),
-            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
-                (VSQRTSDr (f64 (IMPLICIT_DEF)),
-                          (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)),
-                sub_sd)>;
+            (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)),
+                                        (COPY_TO_REGCLASS VR128:$src, FR64)),
+                              VR128)>;
   def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
             (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
 
   def : Pat<(int_x86_sse_rsqrt_ss VR128:$src),
-            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-                (VRSQRTSSr (f32 (IMPLICIT_DEF)),
-                          (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
-                sub_ss)>;
+            (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)),
+                                         (COPY_TO_REGCLASS VR128:$src, FR32)),
+                              VR128)>;
   def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src),
             (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
 
   def : Pat<(int_x86_sse_rcp_ss VR128:$src),
-            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
-                (VRCPSSr (f32 (IMPLICIT_DEF)),
-                         (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)),
-                sub_ss)>;
+            (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)),
+                                       (COPY_TO_REGCLASS VR128:$src, FR32)),
+                              VR128)>;
   def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src),
             (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
 }
@@ -4609,7 +4572,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
 // Bitcast FR64 <-> GR64
 //
 let Predicates = [HasAVX] in
-def VMOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                         [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>,
                         VEX;
@@ -4622,7 +4585,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                          [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
                          IIC_SSE_MOVDQ>, VEX;
 
-def MOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
                        IIC_SSE_MOVDQ>;
@@ -5505,16 +5468,14 @@ let usesCustomInserter = 1 in {
 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
                 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
                 Requires<[HasSSE3]>;
-def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
-                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
-                Requires<[HasSSE3]>;
 }
 
 let Uses = [EAX, ECX, EDX] in
 def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
                  TB, Requires<[HasSSE3]>;
 let Uses = [ECX, EAX] in
-def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait",
+                [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>,
                 TB, Requires<[HasSSE3]>;
 
 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
@@ -6906,81 +6867,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
 }
 
 // Packed Compare Implicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS] in {
-  multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in {
+  multiclass SS42AI_pcmpistri<string asm> {
     def rr : SS42AI<0x63, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
-      [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
-       (implicit EFLAGS)]>, OpSize;
+      []>, OpSize;
+    let mayLoad = 1 in
     def rm : SS42AI<0x63, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
       !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
-      [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
-       (implicit EFLAGS)]>, OpSize;
+      []>, OpSize;
   }
 }
 
-let Predicates = [HasAVX] in {
-defm VPCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
-                                    VEX;
-defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
-                                    VEX;
-defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
-                                    VEX;
-defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
-                                    VEX;
-defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
-                                    VEX;
-defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
-                                    VEX;
-}
-
-defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPISTRI  : SS42AI_pcmpistri<"vpcmpistri">, VEX;
+defm PCMPISTRI   : SS42AI_pcmpistri<"pcmpistri">;
 
 // Packed Compare Explicit Length Strings, Return Index
-let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
-  multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
+  multiclass SS42AI_pcmpestri<string asm> {
     def rr : SS42AI<0x61, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src3, i8imm:$src5),
       !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
-      [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
-       (implicit EFLAGS)]>, OpSize;
+      []>, OpSize;
+    let mayLoad = 1 in
     def rm : SS42AI<0x61, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
       !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
-       [(set ECX,
-             (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
-        (implicit EFLAGS)]>, OpSize;
+      []>, OpSize;
   }
 }
 
-let Predicates = [HasAVX] in {
-defm VPCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
-                                    VEX;
-defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
-                                    VEX;
-defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
-                                    VEX;
-defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
-                                    VEX;
-defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
-                                    VEX;
-defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
-                                    VEX;
-}
-
-defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+let Predicates = [HasAVX] in
+defm VPCMPESTRI  : SS42AI_pcmpestri<"vpcmpestri">, VEX;
+defm PCMPESTRI   : SS42AI_pcmpestri<"pcmpestri">;
 
 //===----------------------------------------------------------------------===//
 // SSE4.2 - CRC Instructions
@@ -7727,24 +7649,18 @@ let Predicates = [HasAVX2] in {
   // is used by additional users, which prevents the pattern selection.
   let AddedComplexity = 20 in {
     def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
-              (VBROADCASTSSrr
-              (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+              (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
     def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
-              (VBROADCASTSSYrr
-              (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+              (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
     def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
-              (VBROADCASTSDYrr
-              (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
+              (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
 
     def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
-              (VBROADCASTSSrr
-              (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+              (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
     def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
-              (VBROADCASTSSYrr
-              (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
+              (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
     def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
-              (VBROADCASTSDYrr
-              (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
+              (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
   }
 }
 
@@ -7768,46 +7684,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
   let AddedComplexity = 20 in {
   // 128bit broadcasts:
   def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
-            (VPSHUFDri
-            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
+            (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
   def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
             (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
-              (VPSHUFDri
-                (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
-                  sub_xmm),
-              (VPSHUFDri
-                (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
-               0), 1)>;
+              (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+              (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
   def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
             (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
-              (VPSHUFDri
-                (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
-                0x44),
-              sub_xmm),
-              (VPSHUFDri
-                (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
-                0x44), 1)>;
+              (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
+              (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
 
   def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
-            (VPSHUFDri
-            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>;
+            (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
   def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
             (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
-              (VPSHUFDri
-                (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0),
-                  sub_xmm),
-              (VPSHUFDri
-                (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss),
-               0), 1)>;
+              (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
+              (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
   def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
             (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
-              (VPSHUFDri
-                (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
-                0x44),
-              sub_xmm),
-              (VPSHUFDri
-                (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
-                0x44), 1)>;
+              (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
+              (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
   }
 }
 
@@ -8052,7 +7948,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
             []>, VEX_4VOp3, VEX_L;
 }
 
-let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
+let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in {
   defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W;
   defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W;
   defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0168d12..7ac4cec 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
 #endif
 }
 
+template<typename T> void addUnaligned(void *Pos, T Delta) {
+  T Value;
+  std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos),
+              sizeof(T));
+  Value += Delta;
+  std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value),
+              sizeof(T));
+}
+
 /// relocate - Before the JIT can run a block of code that has been emitted,
 /// it must rewrite the code to contain the actual addresses of any
 /// referenced global symbols.
@@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
       // PC relative relocation, add the relocated value to the value already in
       // memory, after we adjust it for where the PC is.
       ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
-      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      addUnaligned<unsigned>(RelocPos, ResultPtr);
       break;
     }
     case X86::reloc_picrel_word: {
       // PIC base relative relocation, add the relocated value to the value
       // already in memory, after we adjust it for where the PIC base is.
       ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
-      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      addUnaligned<unsigned>(RelocPos, ResultPtr);
       break;
     }
     case X86::reloc_absolute_word:
     case X86::reloc_absolute_word_sext:
       // Absolute relocation, just add the relocated value to the value already
       // in memory.
-      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      addUnaligned<unsigned>(RelocPos, ResultPtr);
       break;
     case X86::reloc_absolute_dword:
-      *((intptr_t*)RelocPos) += ResultPtr;
+      addUnaligned<intptr_t>(RelocPos, ResultPtr);
       break;
     }
   }
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index c76d3cc..d7c08df 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -65,7 +65,7 @@ namespace llvm {
     /// referenced global symbols.
     virtual void relocate(void *Function, MachineRelocation *MR,
                           unsigned NumRelocs, unsigned char* GOTBase);
-    
+
     /// allocateThreadLocalMemory - Each target has its own way of
     /// handling thread local variables. This method returns a value only
     /// meaningful to the target.
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index df7507c..9c0ce4e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
   assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
 
   SmallString<128> Name;
-  
+
   if (!MO.isGlobal()) {
     assert(MO.isSymbol());
     Name += MAI.getGlobalPrefix();
     Name += MO.getSymbolName();
-  } else {    
+  } else {
     const GlobalValue *GV = MO.getGlobal();
     bool isImplicitlyPrivate = false;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
         MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
       isImplicitlyPrivate = true;
-    
+
     Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
   }
 
@@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
       getMachOMMI().getFnStubEntry(Sym);
     if (StubSym.getPointer())
       return Sym;
-    
+
     if (MO.isGlobal()) {
       StubSym =
         MachineModuleInfoImpl::
@@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   // lot of extra uniquing.
   const MCExpr *Expr = 0;
   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
-  
+
   switch (MO.getTargetFlags()) {
   default: llvm_unreachable("Unknown target flag on GV operand");
   case X86II::MO_NO_FLAG:    // No flag.
@@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_DLLIMPORT:
   case X86II::MO_DARWIN_STUB:
     break;
-      
+
   case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
   case X86II::MO_TLVP_PIC_BASE:
     Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
@@ -173,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
     Expr = MCSymbolRefExpr::Create(Sym, Ctx);
     // Subtract the pic base.
-    Expr = MCBinaryExpr::CreateSub(Expr, 
+    Expr = MCBinaryExpr::CreateSub(Expr,
                             MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
                                    Ctx);
     if (MO.isJTI() && MAI.hasSetDirective()) {
@@ -187,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     }
     break;
   }
-  
+
   if (Expr == 0)
     Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
-  
+
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
                                    MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -211,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
   // Convert registers in the addr mode according to subreg64.
   for (unsigned i = 0; i != 4; ++i) {
     if (!MI->getOperand(OpNo+i).isReg()) continue;
-    
+
     unsigned Reg = MI->getOperand(OpNo+i).getReg();
     if (Reg == 0) continue;
-    
+
     MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
   }
 }
@@ -280,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
     return;
 
   // Check whether this is an absolute address.
-  // FIXME: We know TLVP symbol refs aren't, but there should be a better way 
+  // FIXME: We know TLVP symbol refs aren't, but there should be a better way
   // to do this here.
   bool Absolute = true;
   if (Inst.getOperand(AddrOp).isExpr()) {
@@ -289,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
         Absolute = false;
   }
-  
+
   if (Absolute &&
       (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
        Inst.getOperand(AddrBase + 2).getReg() != 0 ||
@@ -306,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
 
 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
-  
+
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    
+
     MCOperand MCOp;
     switch (MO.getType()) {
     default:
@@ -345,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       // Ignore call clobbers.
       continue;
     }
-    
+
     OutMI.addOperand(MCOp);
   }
-  
+
   // Handle a few special cases to eliminate operand modifiers.
 ReSimplify:
   switch (OutMI.getOpcode()) {
@@ -425,7 +425,7 @@ ReSimplify:
     case X86::TAILJMPd:
     case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
     }
-    
+
     MCOperand Saved = OutMI.getOperand(0);
     OutMI = MCInst();
     OutMI.setOpcode(Opcode);
@@ -445,7 +445,7 @@ ReSimplify:
   case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
   case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
   case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
-      
+
   // The assembler backend wants to see branches in their small form and relax
   // them to their large form.  The JIT can only handle the large form because
   // it does not do relaxation.  For now, translate the large form to the
@@ -688,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     //     call "L1$pb"
     // "L1$pb":
     //     popl %esi
-    
+
     // Emit the call.
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     TmpInst.setOpcode(X86::CALLpcrel32);
@@ -697,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
                                                                  OutContext)));
     OutStreamer.EmitInstruction(TmpInst);
-    
+
     // Emit the label.
     OutStreamer.EmitLabel(PICBase);
-    
+
     // popl $reg
     TmpInst.setOpcode(X86::POP32r);
     TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
     OutStreamer.EmitInstruction(TmpInst);
     return;
   }
-      
+
   case X86::ADD32ri: {
     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
       break;
-    
+
     // Okay, we have something like:
     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
-    
+
     // For this, we want to print something like:
     //   MYGLOBAL + (. - PICBASE)
     // However, we can't generate a ".", so just emit a new label here and refer
     // to it.
     MCSymbol *DotSym = OutContext.CreateTempSymbol();
     OutStreamer.EmitLabel(DotSym);
-    
+
     // Now that we have emitted the label, lower the complex operand expression.
     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
-    
+
     const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
     const MCExpr *PICBase =
       MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
     DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
-    
-    DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), 
+
+    DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext),
                                       DotExpr, OutContext);
-    
+
     MCInst TmpInst;
     TmpInst.setOpcode(X86::ADD32ri);
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -743,7 +743,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   }
-  
+
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
   OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 40df3db..b4d4cfd 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
   class Mangler;
   class TargetMachine;
   class X86AsmPrinter;
-  
+
 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
 class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
   MCContext &Ctx;
@@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
 public:
   X86MCInstLower(Mangler *mang, const MachineFunction &MF,
                  X86AsmPrinter &asmprinter);
-  
+
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
 
   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-  
+
 private:
   MachineModuleInfoMachO &getMachOMMI() const;
 };
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index f83a525..78d20ce 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   virtual void anchor();
 
   /// ForceFramePointer - True if the function is required to use of frame
-  /// pointer for reasons other than it containing dynamic allocation or 
+  /// pointer for reasons other than it containing dynamic allocation or
   /// that FP eliminatation is turned off. For example, Cygwin main function
   /// contains stack pointer re-alignment code which requires FP.
   bool ForceFramePointer;
@@ -83,7 +83,7 @@ public:
                              VarArgsFPOffset(0),
                              ArgumentStackSize(0),
                              NumLocalDynamics(0) {}
-  
+
   explicit X86MachineFunctionInfo(MachineFunction &MF)
     : ForceFramePointer(false),
       CalleeSavedFrameSize(0),
@@ -99,7 +99,7 @@ public:
       ArgumentStackSize(0),
       NumLocalDynamics(0) {}
 
-  bool getForceFramePointer() const { return ForceFramePointer;} 
+  bool getForceFramePointer() const { return ForceFramePointer;}
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
 
   unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index acf53f8..877b8f6 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -72,13 +72,15 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
     SlotSize = 8;
     StackPtr = X86::RSP;
     FramePtr = X86::RBP;
-    BasePtr = X86::RBX;
   } else {
     SlotSize = 4;
     StackPtr = X86::ESP;
     FramePtr = X86::EBP;
-    BasePtr = X86::EBX;
   }
+  // Use a callee-saved register as the base pointer.  These registers must
+  // not conflict with any ABI requirements.  For example, in 32-bit mode PIC
+  // requires GOT in the EBX register before function calls via PLT GOT pointer.
+  BasePtr = Is64Bit ? X86::RBX : X86::ESI;
 }
 
 /// getCompactUnwindRegNum - This function maps the register to the number for
@@ -366,7 +368,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
    if (!EnableBasePointer)
      return false;
 
-   // When we need stack realignment and there are dynamic allocas, we can't 
+   // When we need stack realignment and there are dynamic allocas, we can't
    // reference off of the stack pointer, so we reserve a base pointer.
    if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
      return true;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index ae2d4d0..edc7184 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -23,9 +23,6 @@ let Namespace = "X86" in {
   def sub_8bit_hi : SubRegIndex;
   def sub_16bit   : SubRegIndex;
   def sub_32bit   : SubRegIndex;
-
-  def sub_ss  : SubRegIndex;
-  def sub_sd  : SubRegIndex;
   def sub_xmm : SubRegIndex;
 
 
@@ -163,8 +160,6 @@ let Namespace = "X86" in {
   def FP6 : Register<"fp6">;
 
   // XMM Registers, used by the various SSE instruction set extensions.
-  // The sub_ss and sub_sd subregs are the same registers with another regclass.
-  let CompositeIndices = [(sub_ss), (sub_sd)] in {
   def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
   def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
   def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -184,7 +179,7 @@ let Namespace = "X86" in {
   def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
   def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
-  }}
+  } // CostPerUse
 
   // YMM Registers, used by AVX instructions
   let SubRegIndices = [sub_xmm] in {
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 857becf..0333056 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -21,7 +21,7 @@ namespace llvm {
     /// RelocationType - An enum for the x86 relocation codes. Note that
     /// the terminology here doesn't follow x86 convention - word means
     /// 32-bit and dword means 64-bit. The relocations will be treated
-    /// by JIT or ObjectCode emitters, this is transparent to the x86 code 
+    /// by JIT or ObjectCode emitters, this is transparent to the x86 code
     /// emitter but JIT and ObjectCode will treat them differently
     enum RelocationType {
       /// reloc_pcrel_word - PC relative relocation, add the relocated value to
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7c6788f..00edcbc 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
   // If to a segment-relative address space, use the default lowering.
   if (DstPtrInfo.getAddrSpace() >= 256)
     return SDValue();
-  
+
   // If not DWORD aligned or size is more than the threshold, call the library.
   // The libc version is likely to be faster for these cases. It can use the
   // address value and run time information about the CPU.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e6e9c56..9087852 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -39,10 +39,10 @@ unsigned char X86Subtarget::
 ClassifyBlockAddressReference() const {
   if (isPICStyleGOT())    // 32-bit ELF targets.
     return X86II::MO_GOTOFF;
-  
+
   if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
     return X86II::MO_PIC_BASE_OFFSET;
-  
+
   // Direct static reference to label.
   return X86II::MO_NO_FLAG;
 }
@@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
     // Large model never uses stubs.
     if (TM.getCodeModel() == CodeModel::Large)
       return X86II::MO_NO_FLAG;
-      
+
     if (isTargetDarwin()) {
       // If symbol visibility is hidden, the extra load is not needed if
       // target is x86-64 or the symbol is definitely defined in the current
@@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 
     return X86II::MO_NO_FLAG;
   }
-  
+
   if (isPICStyleGOT()) {   // 32-bit ELF targets.
     // Extra load is needed for all externally visible.
     if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
       return X86II::MO_GOTOFF;
     return X86II::MO_GOT;
   }
-  
+
   if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
     // Determine whether we have a stub reference and/or whether the reference
     // is relative to the PIC base or not.
-    
+
     // If this is a strong reference to a definition, it is definitely not
     // through a stub.
     if (!isDecl && !GV->isWeakForLinker())
@@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
     // normal $non_lazy_ptr stub because this symbol might be resolved late.
     if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
       return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
-    
+
     // If symbol visibility is hidden, we have a stub for common symbol
     // references and external declarations.
     if (isDecl || GV->hasCommonLinkage()) {
       // Hidden $non_lazy_ptr reference.
       return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
     }
-    
+
     // Otherwise, no stub.
     return X86II::MO_PIC_BASE_OFFSET;
   }
-  
+
   if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
     // Determine whether we have a stub reference.
-    
+
     // If this is a strong reference to a definition, it is definitely not
     // through a stub.
     if (!isDecl && !GV->isWeakForLinker())
       return X86II::MO_NO_FLAG;
-    
+
     // Unless we have a symbol with hidden visibility, we have to go through a
     // normal $non_lazy_ptr stub because this symbol might be resolved late.
     if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
@@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
     // Otherwise, no stub.
     return X86II::MO_NO_FLAG;
   }
-  
+
   // Direct static reference to global.
   return X86II::MO_NO_FLAG;
 }
@@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     }
 
     // If it's Nehalem, unaligned memory access is fast.
-    // FIXME: Nehalem is family 6. Also include Westmere and later processors?
-    if (Family == 15 && Model == 26) {
+    // Include Westmere and Sandy Bridge as well.
+    // FIXME: add later processors.
+    if (IsIntel && ((Family == 6 && Model == 26) ||
+        (Family == 6 && Model == 44) ||
+        (Family == 6 && Model == 42))) {
       IsUAMemFast = true;
       ToggleFeature(X86::FeatureFastUAMem);
     }
@@ -315,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 }
 
 X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
-                           const std::string &FS, 
+                           const std::string &FS,
                            unsigned StackAlignOverride, bool is64Bit)
   : X86GenSubtargetInfo(TT, CPU, FS)
   , X86ProcFamily(Others)
@@ -397,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
     }
   }
 
-  if (X86ProcFamily == IntelAtom) {
+  if (X86ProcFamily == IntelAtom)
     PostRAScheduler = true;
-    InstrItins = getInstrItineraryForCPU(CPUName);
-  }
+
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // It's important to keep the MCSubtargetInfo feature bits in sync with
   // target data structure which is shared with MC code emitter, etc.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 1af585f..6841c5b 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -55,7 +55,7 @@ protected:
 
   /// X86ProcFamily - X86 processor family: Intel Atom, and others
   X86ProcFamilyEnum X86ProcFamily;
-  
+
   /// PICStyle - Which PIC style to use
   ///
   PICStyles::Style PICStyle;
@@ -149,7 +149,7 @@ protected:
 
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
-  
+
   /// Instruction itineraries for scheduling
   InstrItineraryData InstrItins;
 
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index e4f567f..80b75dc 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
     DebugLoc dl = I->getDebugLoc();
     bool isControlFlow = MI->isCall() || MI->isReturn();
 
-    // Shortcut: don't need to check regular instructions in dirty state. 
+    // Shortcut: don't need to check regular instructions in dirty state.
     if (!isControlFlow && CurState == ST_DIRTY)
       continue;
 
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 3dbc3b9..a4e5647 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -371,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                false));
   }
 }
-
-void XCoreFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-
-}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index afa2773..db1bbb6 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -44,8 +44,6 @@ namespace llvm {
     void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                               RegScavenger *RS = NULL) const;
 
-    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
     //! Stack slot size (4 bytes)
     static int stackSlotSize() {
       return 4;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 60ce958..6d950d2 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -352,7 +352,8 @@ static bool IsSafeComputationToRemove(Value *V) {
       return true;
     if (!V->hasOneUse())
       return false;
-    if (isa<LoadInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+    if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
+        isa<GlobalValue>(V))
       return false;
     if (isAllocationFn(V))
       return true;
@@ -442,12 +443,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
       Dead[i].second->eraseFromParent();
       Instruction *I = Dead[i].first;
       do {
+	if (isAllocationFn(I))
+	  break;
         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
         if (!J)
           break;
         I->eraseFromParent();
         I = J;
-      } while (!isAllocationFn(I));
+      } while (1);
       I->eraseFromParent();
     }
   }
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index d8e8cf7..80bfc1c 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/TypeFinder.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
@@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
 
 // Strip any named types of their names.
 static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
-  std::vector<StructType*> StructTypes;
-  M.findUsedStructTypes(StructTypes);
+  TypeFinder StructTypes;
+  StructTypes.run(M, false);
 
   for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
     StructType *STy = StructTypes[i];
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c1d9d01..cbe1ca4 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   // if the size is something we can handle with a single primitive load/store.
   // A single load+store correctly handles overlapping memory in the memmove
   // case.
-  unsigned Size = MemOpLength->getZExtValue();
-  if (Size == 0) return MI;  // Delete this mem transfer.
+  uint64_t Size = MemOpLength->getLimitedValue();
+  assert(Size && "0-sized memory transfering should be removed already.");
 
   if (Size > 8 || (Size&(Size-1)))
     return 0;  // If not 1/2/4/8 bytes, exit.
@@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
   if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
     return 0;
-  uint64_t Len = LenC->getZExtValue();
+  uint64_t Len = LenC->getLimitedValue();
   Alignment = MI->getAlignment();
-
-  // If the length is zero, this is a no-op
-  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  assert(Len && "0-sized memory setting should be removed already.");
 
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
@@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
   InstCombineFortifiedLibCalls Simplifier(this);
-  Simplifier.fold(CI, TD);
+  Simplifier.fold(CI, TD, TLI);
   return Simplifier.NewInstruction;
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7076d88..c3fc18c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
@@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
       case ICmpInst::ICMP_UGE:
         // (float)int >= -4.4   --> true
         // (float)int >= 4.4    --> int > 4
-        if (!RHS.isNegative())
+        if (RHS.isNegative())
           return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
         Pred = ICmpInst::ICMP_UGT;
         break;
@@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
                 return Res;
         }
         break;
+      case Instruction::Call: {
+        CallInst *CI = cast<CallInst>(LHSI);
+        LibFunc::Func Func;
+        // Various optimization for fabs compared with zero.
+        if (RHSC->isNullValue() && CI->getCalledFunction() &&
+            TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+            TLI->has(Func)) {
+          if (Func == LibFunc::fabs || Func == LibFunc::fabsf ||
+              Func == LibFunc::fabsl) {
+            switch (I.getPredicate()) {
+            default: break;
+            // fabs(x) < 0 --> false
+            case FCmpInst::FCMP_OLT:
+              return ReplaceInstUsesWith(I, Builder->getFalse());
+            // fabs(x) > 0 --> x != 0
+            case FCmpInst::FCMP_OGT:
+              return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0),
+                                  RHSC);
+            // fabs(x) <= 0 --> x == 0
+            case FCmpInst::FCMP_OLE:
+              return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0),
+                                  RHSC);
+            // fabs(x) >= 0 --> !isnan(x)
+            case FCmpInst::FCMP_OGE:
+              return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0),
+                                  RHSC);
+            // fabs(x) == 0 --> x == 0
+            // fabs(x) != 0 --> x != 0
+            case FCmpInst::FCMP_OEQ:
+            case FCmpInst::FCMP_UEQ:
+            case FCmpInst::FCMP_ONE:
+            case FCmpInst::FCMP_UNE:
+              return new FCmpInst(I.getPredicate(), CI->getArgOperand(0),
+                                  RHSC);
+            }
+          }
+        }
+      }
       }
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index c485844..6ecb4c5 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,7 +20,154 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
+STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
+
+/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable.  This intentionally only accepts
+/// constant expressions because we can't rewrite arbitrary instructions.
+static bool pointsToConstantGlobal(Value *V) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return GV->isConstant();
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::BitCast ||
+        CE->getOpcode() == Instruction::GetElementPtr)
+      return pointsToConstantGlobal(CE->getOperand(0));
+  return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
+/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+                               SmallVectorImpl<Instruction *> &ToDelete,
+                               bool IsOffset = false) {
+  // We track lifetime intrinsics as we encounter them.  If we decide to go
+  // ahead and replace the value with the global, this lets the caller quickly
+  // eliminate the markers.
+
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    User *U = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Ignore non-volatile loads, they are always ok.
+      if (!LI->isSimple()) return false;
+      continue;
+    }
+
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      // If uses of the bitcast are ok, we are ok.
+      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+        return false;
+      continue;
+    }
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
+      // doesn't, it does.
+      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete,
+                                          IsOffset || !GEP->hasAllZeroIndices()))
+        return false;
+      continue;
+    }
+
+    if (CallSite CS = U) {
+      // If this is the function being called then we treat it like a load and
+      // ignore it.
+      if (CS.isCallee(UI))
+        continue;
+
+      // If this is a readonly/readnone call site, then we know it is just a
+      // load (but one that potentially returns the value itself), so we can
+      // ignore it if we know that the value isn't captured.
+      unsigned ArgNo = CS.getArgumentNo(UI);
+      if (CS.onlyReadsMemory() &&
+          (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+        continue;
+
+      // If this is being passed as a byval argument, the caller is making a
+      // copy, so it is only a read of the alloca.
+      if (CS.isByValArgument(ArgNo))
+        continue;
+    }
+
+    // Lifetime intrinsics can be handled by the caller.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        assert(II->use_empty() && "Lifetime markers have no result to use!");
+        ToDelete.push_back(II);
+        continue;
+      }
+    }
+
+    // If this is isn't our memcpy/memmove, reject it as something we can't
+    // handle.
+    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+    if (MI == 0)
+      return false;
+
+    // If the transfer is using the alloca as a source of the transfer, then
+    // ignore it since it is a load (unless the transfer is volatile).
+    if (UI.getOperandNo() == 1) {
+      if (MI->isVolatile()) return false;
+      continue;
+    }
+
+    // If we already have seen a copy, reject the second one.
+    if (TheCopy) return false;
+
+    // If the pointer has been offset from the start of the alloca, we can't
+    // safely handle this.
+    if (IsOffset) return false;
+
+    // If the memintrinsic isn't using the alloca as the dest, reject it.
+    if (UI.getOperandNo() != 0) return false;
+
+    // If the source of the memcpy/move is not a constant global, reject it.
+    if (!pointsToConstantGlobal(MI->getSource()))
+      return false;
+
+    // Otherwise, the transform is safe.  Remember the copy instruction.
+    TheCopy = MI;
+  }
+  return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global.  If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+static MemTransferInst *
+isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+                               SmallVectorImpl<Instruction *> &ToDelete) {
+  MemTransferInst *TheCopy = 0;
+  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
+    return TheCopy;
+  return 0;
+}
+
+/// getPointeeAlignment - Compute the minimum alignment of the value pointed
+/// to by the given pointer.
+static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::BitCast ||
+        (CE->getOpcode() == Instruction::GetElementPtr &&
+         cast<GEPOperator>(CE)->hasAllZeroIndices()))
+      return getPointeeAlignment(CE->getOperand(0), TD);
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    if (!GV->isDeclaration())
+      return TD.getPreferredAlignment(GV);
+
+  if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
+    return TD.getABITypeAlignment(PT->getElementType());
+
+  return 0;
+}
 
 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Ensure that the alloca array size argument has type intptr_t, so that
@@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     }
   }
 
+  // Check to see if this allocation is only modified by a memcpy/memmove from
+  // a constant global whose alignment is equal to or exceeds that of the
+  // allocation.  If this is the case, we can change all users to use
+  // the constant global instead.  This is commonly produced by the CFE by
+  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+  // is only subsequently read.
+  SmallVector<Instruction *, 4> ToDelete;
+  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+    if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+      DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
+      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+        EraseInstFromFunction(*ToDelete[i]);
+      Constant *TheSrc = cast<Constant>(Copy->getSource());
+      Instruction *NewI
+        = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+                                                           AI.getType()));
+      EraseInstFromFunction(*Copy);
+      ++NumGlobalCopies;
+      return NewI;
+    }
+  }
+
   // At last, use the generic allocation site handler to aggressively remove
   // unused allocas.
   return visitAllocSite(AI);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb9945b..291e800 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
   if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
     if (TrueSI->getCondition() == CondVal) {
+      if (SI.getTrueValue() == TrueSI->getTrueValue())
+        return 0;
       SI.setOperand(1, TrueSI->getTrueValue());
       return &SI;
     }
   }
   if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
     if (FalseSI->getCondition() == CondVal) {
+      if (SI.getFalseValue() == FalseSI->getFalseValue())
+        return 0;
       SI.setOperand(2, FalseSI->getFalseValue());
       return &SI;
     }
@@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
+  if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+    unsigned VWidth = VecTy->getNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+    if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) {
+      if (V != &SI)
+        return ReplaceInstUsesWith(SI, V);
+      return &SI;
+    }
+  }
+
   return 0;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 125c74a..54be8ed 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     }
     break;
   }
+  case Instruction::Select: {
+    APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
+    if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
+      for (unsigned i = 0; i < VWidth; i++) {
+        if (CV->getAggregateElement(i)->isNullValue())
+          LeftDemanded.clearBit(i);
+        else
+          RightDemanded.clearBit(i);
+      }
+    }
+
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded,
+                                      UndefElts2, Depth+1);
+    if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; }
+      
+    // Output elements are undefined if both are undefined.
+    UndefElts &= UndefElts2;
+    break;
+  }
   case Instruction::BitCast: {
     // Vector->vector casts only.
     VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
@@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     // like undef&0.  The result is known zero, not undef.
     UndefElts &= UndefElts2;
     break;
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    break;
     
   case Instruction::Call: {
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3368026..06f4d2f 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -61,6 +61,8 @@ static const int   kAsanCtorAndCtorPriority = 1;
 static const char *kAsanReportErrorTemplate = "__asan_report_";
 static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
 static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
+static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
 static const char *kAsanInitName = "__asan_init";
 static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
 static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
@@ -86,8 +88,8 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
 static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics",
        cl::desc("instrument atomic instructions (rmw, cmpxchg)"),
        cl::Hidden, cl::init(true));
-static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks",
-       cl::desc("merge __asan_report_ callbacks to create fewer BBs"),
+static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path",
+       cl::desc("use instrumentation with slow path for all accesses"),
        cl::Hidden, cl::init(false));
 // This flag limits the number of instructions to be instrumented
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),
@@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
 // This flag may need to be replaced with -f[no]asan-globals.
 static cl::opt<bool> ClGlobals("asan-globals",
        cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInitializers("asan-initialization-order",
+       cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
        cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
 // This flag may need to be replaced with -fasan-blacklist.
@@ -145,24 +149,11 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
 
 namespace {
 
-/// When the crash callbacks are merged, they receive some amount of arguments
-/// that are merged in a PHI node. This struct represents arguments from one
-/// call site.
-struct CrashArg {
-  Value *Arg1;
-  Value *Arg2;
-};
-
 /// An object of this type is created while instrumenting every function.
 struct AsanFunctionContext {
-  AsanFunctionContext(Function &Function) : F(Function), CrashBlock() { }
+  AsanFunctionContext(Function &Function) : F(Function) { }
 
   Function &F;
-  // These are initially zero. If we require at least one call to
-  // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created.
-  BasicBlock *CrashBlock[2][kNumberOfAccessSizes];
-  typedef  SmallVector<CrashArg, 8> CrashArgsVec;
-  CrashArgsVec CrashArgs[2][kNumberOfAccessSizes];
 };
 
 /// AddressSanitizer: instrument the code in module to find memory bugs.
@@ -175,7 +166,7 @@ struct AddressSanitizer : public ModulePass {
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
-  Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC,
+  Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                  bool IsWrite, size_t AccessSizeIndex);
   bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
   void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
@@ -184,6 +175,8 @@ struct AddressSanitizer : public ModulePass {
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool handleFunction(Module &M, Function &F);
+  void createInitializerPoisonCalls(Module &M,
+                                    Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   bool poisonStackInFunction(Module &M, Function &F);
   virtual bool runOnModule(Module &M);
@@ -191,7 +184,6 @@ struct AddressSanitizer : public ModulePass {
   static char ID;  // Pass identification, replacement for typeid
 
  private:
-
   uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
     Type *Ty = AI->getAllocatedType();
     uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -207,9 +199,12 @@ struct AddressSanitizer : public ModulePass {
   }
 
   Function *checkInterfaceFunction(Constant *FuncOrBitcast);
+  bool ShouldInstrumentGlobal(GlobalVariable *G);
   void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
                    Value *ShadowBase, bool DoPoison);
   bool LooksLikeCodeInBug11395(Instruction *I);
+  void FindDynamicInitializers(Module &M);
+  bool HasDynamicInitializer(GlobalVariable *G);
 
   LLVMContext *C;
   TargetData *TD;
@@ -226,6 +221,7 @@ struct AddressSanitizer : public ModulePass {
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
   InlineAsm *EmptyAsm;
+  SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals;
 };
 
 }  // namespace
@@ -267,24 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
 //     ThenBlock
 //   Tail
 //
-// If ThenBlock is zero, a new block is created and its terminator is returned.
-// Otherwize 0 is returned.
-static BranchInst *splitBlockAndInsertIfThen(Value *Cmp,
-                                             BasicBlock *ThenBlock = 0) {
+// ThenBlock block is created and its terminator is returned.
+// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
+// it is terminated with BranchInst to Tail.
+static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
   Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
   BasicBlock *Head = SplitBefore->getParent();
   BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
   TerminatorInst *HeadOldTerm = Head->getTerminator();
-  BranchInst *CheckTerm = 0;
-  if (!ThenBlock) {
-    LLVMContext &C = Head->getParent()->getParent()->getContext();
-    ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  LLVMContext &C = Head->getParent()->getParent()->getContext();
+  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+  TerminatorInst *CheckTerm;
+  if (Unreachable)
+    CheckTerm = new UnreachableInst(C, ThenBlock);
+  else
     CheckTerm = BranchInst::Create(Tail, ThenBlock);
-  }
   BranchInst *HeadNewTerm =
     BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
   ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
   return CheckTerm;
 }
 
@@ -336,7 +332,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
 
     Value *Cmp = IRB.CreateICmpNE(Length,
                                   Constant::getNullValue(Length->getType()));
-    InsertBefore = splitBlockAndInsertIfThen(Cmp);
+    InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
   }
 
   instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
@@ -371,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
   return NULL;
 }
 
+void AddressSanitizer::FindDynamicInitializers(Module& M) {
+  // Clang generates metadata identifying all dynamically initialized globals.
+  NamedMDNode *DynamicGlobals =
+      M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
+  if (!DynamicGlobals)
+    return;
+  for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
+    MDNode *MDN = DynamicGlobals->getOperand(i);
+    assert(MDN->getNumOperands() == 1);
+    Value *VG = MDN->getOperand(0);
+    // The optimizer may optimize away a global entirely, in which case we
+    // cannot instrument access to it.
+    if (!VG)
+      continue;
+
+    GlobalVariable *G = cast<GlobalVariable>(VG);
+    DynamicallyInitializedGlobals.insert(G);
+  }
+}
+// Returns true if a global variable is initialized dynamically in this TU.
+bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
+  return DynamicallyInitializedGlobals.count(G);
+}
+
 void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
   bool IsWrite;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
   assert(Addr);
-  if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
-    // We are accessing a global scalar variable. Nothing to catch here.
-    return;
+  if (ClOpt && ClOptGlobals) {
+    if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
+      // If initialization order checking is disabled, a simple access to a
+      // dynamically initialized global is always valid.
+      if (!ClInitializers)
+        return;
+      // If a global variable does not have dynamic initialization we don't
+      // have to instrument it.  However, if a global has external linkage, we
+      // assume it has dynamic initialization, as it may have an initializer
+      // in a different TU.
+      if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+          !HasDynamicInitializer(G))
+        return;
+    }
   }
+
   Type *OrigPtrTy = Addr->getType();
   Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
 
@@ -407,15 +439,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) {
 }
 
 Instruction *AddressSanitizer::generateCrashCode(
-    BasicBlock *BB, Value *Addr, Value *PC,
+    Instruction *InsertBefore, Value *Addr,
     bool IsWrite, size_t AccessSizeIndex) {
-  IRBuilder<> IRB(BB->getFirstNonPHI());
-  CallInst *Call;
-  if (PC)
-    Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex],
-                           Addr, PC);
-  else
-    Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr);
+  IRBuilder<> IRB(InsertBefore);
+  CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex],
+                                  Addr);
   // We don't do Call->setDoesNotReturn() because the BB already has
   // UnreachableInst at the end.
   // This EmptyAsm is required to avoid callback merge.
@@ -436,7 +464,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
         LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
   // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
   LastAccessedByte = IRB.CreateIntCast(
-      LastAccessedByte, IRB.getInt8Ty(), false);
+      LastAccessedByte, ShadowValue->getType(), false);
   // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
   return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
@@ -456,112 +484,129 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
       IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
 
   Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
-
-  BasicBlock *CrashBlock = 0;
-  if (ClMergeCallbacks) {
-    size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-    BasicBlock **Cached = &AFC.CrashBlock[IsWrite][AccessSizeIndex];
-    if (!*Cached) {
-      std::string BBName("crash_bb-");
-      BBName += (IsWrite ? "w-" : "r-") + itostr(1 << AccessSizeIndex);
-      BasicBlock *BB = BasicBlock::Create(*C, BBName, &AFC.F);
-      new UnreachableInst(*C, BB);
-      *Cached = BB;
-    }
-    CrashBlock = *Cached;
-    // We need to pass the PC as the second parameter to __asan_report_*.
-    // There are few problems:
-    //  - Some architectures (e.g. x86_32) don't have a cheap way to get the PC.
-    //  - LLVM doesn't have the appropriate intrinsic.
-    // For now, put a random number into the PC, just to allow experiments.
-    Value *PC = ConstantInt::get(IntptrTy, rand());
-    CrashArg Arg = {AddrLong, PC};
-    AFC.CrashArgs[IsWrite][AccessSizeIndex].push_back(Arg);
-  } else {
-    CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F);
-    new UnreachableInst(*C, CrashBlock);
-    size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
-    Instruction *Crash =
-        generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex);
-    Crash->setDebugLoc(OrigIns->getDebugLoc());
-  }
-
+  size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
   size_t Granularity = 1 << MappingScale;
-  if (TypeSize < 8 * Granularity) {
-    BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp);
-    assert(CheckTerm->isUnconditional());
+  TerminatorInst *CrashTerm = 0;
+
+  if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+    TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+    assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+    BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+    CrashTerm = new UnreachableInst(*C, CrashBlock);
     BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
     ReplaceInstWithInst(CheckTerm, NewTerm);
   } else {
-    splitBlockAndInsertIfThen(Cmp, CrashBlock);
+    CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+  }
+
+  Instruction *Crash =
+      generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
+}
+
+void AddressSanitizer::createInitializerPoisonCalls(Module &M,
+                                                    Value *FirstAddr,
+                                                    Value *LastAddr) {
+  // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
+  Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+  // If that function is not present, this TU contains no globals, or they have
+  // all been optimized away
+  if (!GlobalInit)
+    return;
+
+  // Set up the arguments to our poison/unpoison functions.
+  IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+
+  // Declare our poisoning and unpoisoning functions.
+  Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
+  Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL));
+  AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
+
+  // Add a call to poison all external globals before the given function starts.
+  IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr);
+
+  // Add calls to unpoison all globals before each return instruction.
+  for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
+      I != E; ++I) {
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+      CallInst::Create(AsanUnpoisonGlobals, "", RI);
+    }
   }
 }
 
+bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
+  Type *Ty = cast<PointerType>(G->getType())->getElementType();
+  DEBUG(dbgs() << "GLOBAL: " << *G);
+
+  if (!Ty->isSized()) return false;
+  if (!G->hasInitializer()) return false;
+  // Touch only those globals that will not be defined in other modules.
+  // Don't handle ODR type linkages since other modules may be built w/o asan.
+  if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+      G->getLinkage() != GlobalVariable::PrivateLinkage &&
+      G->getLinkage() != GlobalVariable::InternalLinkage)
+    return false;
+  // Two problems with thread-locals:
+  //   - The address of the main thread's copy can't be computed at link-time.
+  //   - Need to poison all copies, not just the main thread's one.
+  if (G->isThreadLocal())
+    return false;
+  // For now, just ignore this Alloca if the alignment is large.
+  if (G->getAlignment() > RedzoneSize) return false;
+
+  // Ignore all the globals with the names starting with "\01L_OBJC_".
+  // Many of those are put into the .cstring section. The linker compresses
+  // that section by removing the spare \0s after the string terminator, so
+  // our redzones get broken.
+  if ((G->getName().find("\01L_OBJC_") == 0) ||
+      (G->getName().find("\01l_OBJC_") == 0)) {
+    DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+    return false;
+  }
+
+  if (G->hasSection()) {
+    StringRef Section(G->getSection());
+    // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+    // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+    // them.
+    if ((Section.find("__OBJC,") == 0) ||
+        (Section.find("__DATA, __objc_") == 0)) {
+      DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+      return false;
+    }
+    // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+    // Constant CFString instances are compiled in the following way:
+    //  -- the string buffer is emitted into
+    //     __TEXT,__cstring,cstring_literals
+    //  -- the constant NSConstantString structure referencing that buffer
+    //     is placed into __DATA,__cfstring
+    // Therefore there's no point in placing redzones into __DATA,__cfstring.
+    // Moreover, it causes the linker to crash on OS X 10.7
+    if (Section.find("__DATA,__cfstring") == 0) {
+      DEBUG(dbgs() << "Ignoring CFString: " << *G);
+      return false;
+    }
+  }
+
+  return true;
+}
+
 // This function replaces all global variables with new variables that have
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
 bool AddressSanitizer::insertGlobalRedzones(Module &M) {
   SmallVector<GlobalVariable *, 16> GlobalsToChange;
 
-  for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
-       E = M.getGlobalList().end(); G != E; ++G) {
-    Type *Ty = cast<PointerType>(G->getType())->getElementType();
-    DEBUG(dbgs() << "GLOBAL: " << *G);
-
-    if (!Ty->isSized()) continue;
-    if (!G->hasInitializer()) continue;
-    // Touch only those globals that will not be defined in other modules.
-    // Don't handle ODR type linkages since other modules may be built w/o asan.
-    if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
-        G->getLinkage() != GlobalVariable::PrivateLinkage &&
-        G->getLinkage() != GlobalVariable::InternalLinkage)
-      continue;
-    // Two problems with thread-locals:
-    //   - The address of the main thread's copy can't be computed at link-time.
-    //   - Need to poison all copies, not just the main thread's one.
-    if (G->isThreadLocal())
-      continue;
-    // For now, just ignore this Alloca if the alignment is large.
-    if (G->getAlignment() > RedzoneSize) continue;
-
-    // Ignore all the globals with the names starting with "\01L_OBJC_".
-    // Many of those are put into the .cstring section. The linker compresses
-    // that section by removing the spare \0s after the string terminator, so
-    // our redzones get broken.
-    if ((G->getName().find("\01L_OBJC_") == 0) ||
-        (G->getName().find("\01l_OBJC_") == 0)) {
-      DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
-      continue;
-    }
-
-    if (G->hasSection()) {
-      StringRef Section(G->getSection());
-      // Ignore the globals from the __OBJC section. The ObjC runtime assumes
-      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
-      // them.
-      if ((Section.find("__OBJC,") == 0) ||
-          (Section.find("__DATA, __objc_") == 0)) {
-        DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
-        continue;
-      }
-      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
-      // Constant CFString instances are compiled in the following way:
-      //  -- the string buffer is emitted into
-      //     __TEXT,__cstring,cstring_literals
-      //  -- the constant NSConstantString structure referencing that buffer
-      //     is placed into __DATA,__cfstring
-      // Therefore there's no point in placing redzones into __DATA,__cfstring.
-      // Moreover, it causes the linker to crash on OS X 10.7
-      if (Section.find("__DATA,__cfstring") == 0) {
-        DEBUG(dbgs() << "Ignoring CFString: " << *G);
-        continue;
-      }
-    }
-
-    GlobalsToChange.push_back(G);
+  for (Module::GlobalListType::iterator G = M.global_begin(),
+       E = M.global_end(); G != E; ++G) {
+    if (ShouldInstrumentGlobal(G))
+      GlobalsToChange.push_back(G);
   }
 
   size_t n = GlobalsToChange.size();
@@ -572,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
   //   size_t size;
   //   size_t size_with_redzone;
   //   const char *name;
+  //   size_t has_dynamic_init;
   // We initialize an array of such structures and pass it to a run-time call.
   StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
-                                               IntptrTy, IntptrTy, NULL);
-  SmallVector<Constant *, 16> Initializers(n);
+                                               IntptrTy, IntptrTy,
+                                               IntptrTy, NULL);
+  SmallVector<Constant *, 16> Initializers(n), DynamicInit;
 
   IRBuilder<> IRB(CtorInsertBefore);
 
+  if (ClInitializers)
+    FindDynamicInitializers(M);
+
+  // The addresses of the first and last dynamically initialized globals in
+  // this TU.  Used in initialization order checking.
+  Value *FirstDynamic = 0, *LastDynamic = 0;
+
   for (size_t i = 0; i < n; i++) {
     GlobalVariable *G = GlobalsToChange[i];
     PointerType *PtrTy = cast<PointerType>(G->getType());
@@ -587,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
     uint64_t RightRedzoneSize = RedzoneSize +
         (RedzoneSize - (SizeInBytes % RedzoneSize));
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+    // Determine whether this global should be poisoned in initialization.
+    bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
     Constant *NewInitializer = ConstantStruct::get(
@@ -621,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
         ConstantInt::get(IntptrTy, SizeInBytes),
         ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
         ConstantExpr::getPointerCast(Name, IntptrTy),
+        ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
         NULL);
+
+    // Populate the first and last globals declared in this TU.
+    if (ClInitializers && GlobalHasDynamicInitializer) {
+      LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
+      if (FirstDynamic == 0)
+        FirstDynamic = LastDynamic;
+    }
+
     DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
   }
 
@@ -630,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
       M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
       ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
 
+  // Create calls for poisoning before initializers run and unpoisoning after.
+  if (ClInitializers && FirstDynamic && LastDynamic)
+    createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
+
   Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
-      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+      kAsanRegisterGlobalsName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, NULL));
   AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
 
   IRB.CreateCall2(AsanRegisterGlobals,
@@ -694,12 +764,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
       std::string FunctionName = std::string(kAsanReportErrorTemplate) +
           (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
       // If we are merging crash callbacks, they have two parameters.
-      if (ClMergeCallbacks)
-        AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
-          M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy,
-                                IntptrTy, NULL));
-      else
-        AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
+      AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>(
           M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
     }
   }
@@ -845,33 +910,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
     NumInstrumented++;
   }
 
-  // Create PHI nodes and crash callbacks if we are merging crash callbacks.
-  if (NumInstrumented) {
-    for (size_t IsWrite = 0; IsWrite <= 1; IsWrite++) {
-      for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
-           AccessSizeIndex++) {
-        BasicBlock *BB = AFC.CrashBlock[IsWrite][AccessSizeIndex];
-        if (!BB) continue;
-        assert(ClMergeCallbacks);
-        AsanFunctionContext::CrashArgsVec &Args =
-            AFC.CrashArgs[IsWrite][AccessSizeIndex];
-        IRBuilder<> IRB(BB->getFirstNonPHI());
-        size_t n = Args.size();
-        PHINode *PN1 = IRB.CreatePHI(IntptrTy, n);
-        PHINode *PN2 = IRB.CreatePHI(IntptrTy, n);
-        // We need to match crash parameters and the predecessors.
-        for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-             PI != PE; ++PI) {
-          n--;
-          PN1->addIncoming(Args[n].Arg1, *PI);
-          PN2->addIncoming(Args[n].Arg2, *PI);
-        }
-        assert(n == 0);
-        generateCrashCode(BB, PN1, PN2, IsWrite, AccessSizeIndex);
-      }
-    }
-  }
-
   DEBUG(dbgs() << F);
 
   bool ChangedStack = poisonStackInFunction(M, F);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index f76c77e..a4bb5a6 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -26,30 +26,6 @@ namespace llvm {
   /// The type parameter T determines the type of the nodes of the graph.
   template <typename T>
   class MaximumSpanningTree {
-
-    // A comparing class for comparing weighted edges.
-    template <typename CT>
-    struct EdgeWeightCompare {
-      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, 
-                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
-        if (X.second > Y.second) return true;
-        if (X.second < Y.second) return false;
-        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
-          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
-            if (BBX->size() > BBY->size()) return true;
-            if (BBX->size() < BBY->size()) return false;
-          }
-        }
-        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
-          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
-            if (BBX->size() > BBY->size()) return true;
-            if (BBX->size() < BBY->size()) return false;
-          }
-        }
-        return false;
-      }
-    };
-
   public:
     typedef std::pair<const T*, const T*> Edge;
     typedef std::pair<Edge, double> EdgeWeight;
@@ -59,6 +35,33 @@ namespace llvm {
 
     MaxSpanTree MST;
 
+  private:
+    // A comparing class for comparing weighted edges.
+    struct EdgeWeightCompare {
+      static bool getBlockSize(const T *X) {
+        const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X);
+        return BB ? BB->size() : 0;
+      }
+
+      bool operator()(EdgeWeight X, EdgeWeight Y) const {
+        if (X.second > Y.second) return true;
+        if (X.second < Y.second) return false;
+
+        // Equal edge weights: break ties by comparing block sizes.
+        size_t XSizeA = getBlockSize(X.first.first);
+        size_t YSizeA = getBlockSize(Y.first.first);
+        if (XSizeA > YSizeA) return true;
+        if (XSizeA < YSizeA) return false;
+
+        size_t XSizeB = getBlockSize(X.first.second);
+        size_t YSizeB = getBlockSize(Y.first.second);
+        if (XSizeB > YSizeB) return true;
+        if (XSizeB < YSizeB) return false;
+
+        return false;
+      }
+    };
+
   public:
     static char ID; // Class identification, replacement for typeinfo
 
@@ -66,7 +69,7 @@ namespace llvm {
     /// spanning tree.
     MaximumSpanningTree(EdgeWeights &EdgeVector) {
 
-      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare());
 
       // Create spanning tree, Forest contains a special data structure
       // that makes checking if two nodes are already in a common (sub-)tree
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 277c4d5..a8deda8 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
-// FIXME: Remove this abomination once all of the tests pass without it!
-static cl::opt<bool> DisableDeleteDeadBlocks(
-  "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
-  cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
-
 static cl::opt<bool> DisableSelectToBranch(
   "disable-cgp-select2branch", cl::Hidden, cl::init(false),
   cl::desc("Disable select to branch conversion."));
@@ -116,6 +111,7 @@ namespace {
     }
 
   private:
+    bool EliminateFallThrough(Function &F);
     bool EliminateMostlyEmptyBlocks(Function &F);
     bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
     void EliminateMostlyEmptyBlock(BasicBlock *BB);
@@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
           WorkList.insert(*II);
     }
 
-    if (!DisableDeleteDeadBlocks)
-      for (SmallPtrSet<BasicBlock*, 8>::iterator
-             I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
-        DeleteDeadBlock(*I);
+    for (SmallPtrSet<BasicBlock*, 8>::iterator
+           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+      DeleteDeadBlock(*I);
+
+    // Merge pairs of basic blocks with unconditional branches, connected by
+    // a single edge.
+    if (EverMadeChange || MadeChange)
+      MadeChange |= EliminateFallThrough(F);
 
     if (MadeChange)
       ModifiedDT = true;
@@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   return EverMadeChange;
 }
 
+/// EliminateFallThrough - Merge basic blocks which are connected
+/// by a single edge, where one of the basic blocks has a single successor
+/// pointing to the other basic block, which has a single predecessor.
+bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+  bool Changed = false;
+  // Scan all of the blocks in the function, except for the entry block.
+  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+    // If the destination block has a single pred, then this is a trivial
+    // edge, just collapse it.
+    BasicBlock *SinglePred = BB->getSinglePredecessor();
+
+    if (!SinglePred || SinglePred == BB) continue;
+
+    BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
+    if (Term && !Term->isConditional()) {
+      Changed = true;
+      DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
+      // Remember if SinglePred was the entry block of the function.
+      // If so, we will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      MergeBasicBlockIntoOnlyPred(BB, this);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+
+      // We have erased a block. Update the iterator.
+      I = BB;
+    }
+  }
+  return Changed;
+}
+
 /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
 /// debug info directives, and an unconditional branch.  Passes before isel
 /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
@@ -610,7 +643,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
   // that have the default "don't know" as the objectsize.  Anything else
   // should be left alone.
   CodeGenPrepareFortifiedLibCalls Simplifier;
-  return Simplifier.fold(CI, TD);
+  return Simplifier.fold(CI, TD, TLInfo);
 }
 
 /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
@@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
   if (!TLI)
     return false;
 
+  PHINode *PN = 0;
+  BitCastInst *BCI = 0;
   Value *V = RI->getReturnValue();
-  PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
-  if (V && !PN)
-    return false;
+  if (V) {
+    BCI = dyn_cast<BitCastInst>(V);
+    if (BCI)
+      V = BCI->getOperand(0);
+
+    PN = dyn_cast<PHINode>(V);
+    if (!PN)
+      return false;
+  }
 
   BasicBlock *BB = RI->getParent();
   if (PN && PN->getParent() != BB)
@@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
   if (PN) {
     BasicBlock::iterator BI = BB->begin();
     do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI == BCI)
+      // Also skip over the bitcast.
+      ++BI;
     if (&*BI != RI)
       return false;
   } else {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 5eff0e5..8b1283f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -378,7 +378,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   //
   // We have to be careful here as *Off is signed while *.Size is unsigned.
   if (EarlierOff >= LaterOff &&
-      Later.Size > Earlier.Size &&
+      Later.Size >= Earlier.Size &&
       uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
     return OverwriteComplete;
 
@@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
-    if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) {
+    if (isa<AllocaInst>(BBI)) {
+      // Remove allocas from the list of dead stack objects; there can't be
+      // any references before the definition.
       DeadStackObjects.remove(BBI);
       continue;
     }
 
     if (CallSite CS = cast<Value>(BBI)) {
+      // Remove allocation function calls from the list of dead stack objects; 
+      // there can't be any references before the definition.
+      if (isAllocLikeFn(BBI))
+        DeadStackObjects.remove(BBI);
+
       // If this call does not access memory, it can't be loading any of our
       // pointers.
       if (AA->doesNotAccessMemory(CS))
@@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       // If all of the allocas were clobbered by the call then we're not going
       // to find anything else to process.
       if (DeadStackObjects.empty())
-        return MadeChange;
+        break;
 
       continue;
     }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 140864d..4822fd0 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -512,7 +512,7 @@ namespace {
     /// have that value number.  Use findLeader to query it.
     struct LeaderTableEntry {
       Value *Val;
-      BasicBlock *BB;
+      const BasicBlock *BB;
       LeaderTableEntry *Next;
     };
     DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
@@ -542,7 +542,7 @@ namespace {
   private:
     /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
     /// its value number.
-    void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+    void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
       LeaderTableEntry &Curr = LeaderTable[N];
       if (!Curr.Val) {
         Curr.Val = V;
@@ -608,13 +608,13 @@ namespace {
     void dump(DenseMap<uint32_t, Value*> &d);
     bool iterateOnFunction(Function &F);
     bool performPRE(Function &F);
-    Value *findLeader(BasicBlock *BB, uint32_t num);
+    Value *findLeader(const BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
     bool splitCriticalEdges();
     unsigned replaceAllDominatedUsesWith(Value *From, Value *To,
-                                         BasicBlock *Root);
-    bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root);
+                                         const BasicBlockEdge &Root);
+    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
   };
 
   char GVN::ID = 0;
@@ -1977,7 +1977,7 @@ bool GVN::processLoad(LoadInst *L) {
 // and then scan the list to find one whose block dominates the block in
 // question.  This is fast because dominator tree queries consist of only
 // a few comparisons of DFS numbers.
-Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
   LeaderTableEntry Vals = LeaderTable[num];
   if (!Vals.Val) return 0;
 
@@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
 /// use is dominated by the given basic block.  Returns the number of uses that
 /// were replaced.
 unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
-                                          BasicBlock *Root) {
+                                          const BasicBlockEdge &Root) {
   unsigned Count = 0;
   for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
        UI != UE; ) {
     Use &U = (UI++).getUse();
 
-    // If From occurs as a phi node operand then the use implicitly lives in the
-    // corresponding incoming block.  Otherwise it is the block containing the
-    // user that must be dominated by Root.
-    BasicBlock *UsingBlock;
-    if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
-      UsingBlock = PN->getIncomingBlock(U);
-    else
-      UsingBlock = cast<Instruction>(U.getUser())->getParent();
-
-    if (DT->dominates(Root, UsingBlock)) {
+    if (DT->dominates(Root, U)) {
       U.set(To);
       ++Count;
     }
@@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
   return Count;
 }
 
+/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
+/// true if every path from the entry block to 'Dst' passes via this edge.  In
+/// particular 'Dst' must not be reachable via another edge from 'Src'.
+static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
+                                       DominatorTree *DT) {
+  // While in theory it is interesting to consider the case in which Dst has
+  // more than one predecessor, because Dst might be part of a loop which is
+  // only reachable from Src, in practice it is pointless since at the time
+  // GVN runs all such loops have preheaders, which means that Dst will have
+  // been changed to have only one predecessor, namely Src.
+  const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+  const BasicBlock *Src = E.getStart();
+  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+  (void)Src;
+  return Pred != 0;
+}
+
 /// propagateEquality - The given values are known to be equal in every block
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
+bool GVN::propagateEquality(Value *LHS, Value *RHS,
+                            const BasicBlockEdge &Root) {
   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
   Worklist.push_back(std::make_pair(LHS, RHS));
   bool Changed = false;
+  // For speed, compute a conservative fast approximation to
+  // DT->dominates(Root, Root.getEnd());
+  bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT);
 
   while (!Worklist.empty()) {
     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
@@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
         LVN = RVN;
       }
     }
-    assert((!isa<Instruction>(RHS) ||
-            DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
-           "Instruction doesn't dominate scope!");
 
     // If value numbering later sees that an instruction in the scope is equal
     // to 'LHS' then ensure it will be turned into 'RHS'.  In order to preserve
@@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
     // if RHS is an instruction (if an instruction in the scope is morphed into
     // LHS then it will be turned into RHS by the next GVN iteration anyway, so
     // using the leader table is about compiling faster, not optimizing better).
-    if (!isa<Instruction>(RHS))
-      addToLeaderTable(LVN, RHS, Root);
+    // The leader table only tracks basic blocks, not edges. Only add to if we
+    // have the simple case where the edge dominates the end.
+    if (RootDominatesEnd && !isa<Instruction>(RHS))
+      addToLeaderTable(LVN, RHS, Root.getEnd());
 
     // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
     // LHS always has at least one use that is not dominated by Root, this will
@@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
       // If the number we were assigned was brand new then there is no point in
       // looking for an instruction realizing it: there cannot be one!
       if (Num < NextNum) {
-        Value *NotCmp = findLeader(Root, Num);
+        Value *NotCmp = findLeader(Root.getEnd(), Num);
         if (NotCmp && isa<Instruction>(NotCmp)) {
           unsigned NumReplacements =
             replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
@@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
       }
       // Ensure that any instruction in scope that gets the "A < B" value number
       // is replaced with false.
-      addToLeaderTable(Num, NotVal, Root);
+      // The leader table only tracks basic blocks, not edges. Only add to if we
+      // have the simple case where the edge dominates the end.
+      if (RootDominatesEnd)
+        addToLeaderTable(Num, NotVal, Root.getEnd());
 
       continue;
     }
@@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
   return Changed;
 }
 
-/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'.  Return
-/// true if every path from the entry block to 'Dst' passes via this edge.  In
-/// particular 'Dst' must not be reachable via another edge from 'Src'.
-static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
-                                       DominatorTree *DT) {
-  // While in theory it is interesting to consider the case in which Dst has
-  // more than one predecessor, because Dst might be part of a loop which is
-  // only reachable from Src, in practice it is pointless since at the time
-  // GVN runs all such loops have preheaders, which means that Dst will have
-  // been changed to have only one predecessor, namely Src.
-  BasicBlock *Pred = Dst->getSinglePredecessor();
-  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
-  (void)Src;
-  return Pred != 0;
-}
-
 /// processInstruction - When calculating availability, handle an instruction
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I) {
@@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) {
 
     BasicBlock *TrueSucc = BI->getSuccessor(0);
     BasicBlock *FalseSucc = BI->getSuccessor(1);
+    // Avoid multiple edges early.
+    if (TrueSucc == FalseSucc)
+      return false;
+
     BasicBlock *Parent = BI->getParent();
     bool Changed = false;
 
-    if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getTrue(TrueSucc->getContext()),
-                                   TrueSucc);
+    Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
+    BasicBlockEdge TrueE(Parent, TrueSucc);
+    Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
 
-    if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT))
-      Changed |= propagateEquality(BranchCond,
-                                   ConstantInt::getFalse(FalseSucc->getContext()),
-                                   FalseSucc);
+    Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
+    BasicBlockEdge FalseE(Parent, FalseSucc);
+    Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
 
     return Changed;
   }
@@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) {
     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
          i != e; ++i) {
       BasicBlock *Dst = i.getCaseSuccessor();
-      if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
-        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
+      BasicBlockEdge E(Parent, Dst);
+      if (E.isSingleEdge())
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
     }
     return Changed;
   }
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 582948e..0192e92 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -175,7 +175,9 @@ namespace {
     bool canSinkOrHoistInst(Instruction &I);
     bool isNotUsedInLoop(Instruction &I);
 
-    void PromoteAliasSet(AliasSet &AS);
+    void PromoteAliasSet(AliasSet &AS,
+                         SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                         SmallVectorImpl<Instruction*> &InsertPts);
   };
 }
 
@@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
   if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    SmallVector<BasicBlock *, 8> ExitBlocks;
+    SmallVector<Instruction *, 8> InsertPts;
+
     // Loop over all of the alias sets in the tracker object.
     for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
          I != E; ++I)
-      PromoteAliasSet(*I);
+      PromoteAliasSet(*I, ExitBlocks, InsertPts);
   }
 
   // Clear out loops state information for the next iteration
@@ -631,6 +636,7 @@ namespace {
     Value *SomePtr;  // Designated pointer to store to.
     SmallPtrSet<Value*, 4> &PointerMustAliases;
     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+    SmallVectorImpl<Instruction*> &LoopInsertPts;
     AliasSetTracker &AST;
     DebugLoc DL;
     int Alignment;
@@ -638,11 +644,12 @@ namespace {
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
-                 SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
-                 DebugLoc dl, int alignment)
+                 SmallVectorImpl<BasicBlock*> &LEB,
+                 SmallVectorImpl<Instruction*> &LIP,
+                 AliasSetTracker &ast, DebugLoc dl, int alignment)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP),
-        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
-        Alignment(alignment) {}
+        PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
+        AST(ast), DL(dl), Alignment(alignment) {}
 
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
@@ -662,7 +669,7 @@ namespace {
       for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
         BasicBlock *ExitBlock = LoopExitBlocks[i];
         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
-        Instruction *InsertPos = ExitBlock->getFirstInsertionPt();
+        Instruction *InsertPos = LoopInsertPts[i];
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
         NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
@@ -684,7 +691,9 @@ namespace {
 /// looping over the stores in the loop, looking for stores to Must pointers
 /// which are loop invariant.
 ///
-void LICM::PromoteAliasSet(AliasSet &AS) {
+void LICM::PromoteAliasSet(AliasSet &AS,
+                           SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                           SmallVectorImpl<Instruction*> &InsertPts) {
   // We can promote this alias set if it has a store, if it is a "Must" alias
   // set, if the pointer is loop invariant, and if we are not eliminating any
   // volatile loads or stores.
@@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   // location is better than none.
   DebugLoc DL = LoopUses[0]->getDebugLoc();
 
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  // Figure out the loop exits and their insertion points, if this is the
+  // first promotion.
+  if (ExitBlocks.empty()) {
+    CurLoop->getUniqueExitBlocks(ExitBlocks);
+    InsertPts.resize(ExitBlocks.size());
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+  }
 
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        *CurAST, DL, Alignment);
+                        InsertPts, *CurAST, DL, Alignment);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b14a713..0ae7a51 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
   bool Changed = false;
 
   while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+    Value *V = DeadInsts.pop_back_val();
+    Instruction *I = dyn_cast_or_null<Instruction>(V);
 
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index ffcf97c..09687d8 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
         // Update the number of paths to the leaf.
         IncorporateWeight(It->second, Weight, Opcode);
 
+#if 0   // TODO: Re-enable once PR13021 is fixed.
         // The leaf already has one use from inside the expression.  As we want
         // exactly one such use, drop this new use of the leaf.
         assert(!Op->hasOneUse() && "Only one use, but we got here twice!");
@@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I,
           Leaves.erase(It);
           continue;
         }
+#endif
 
         // If we still have uses that are not accounted for by the expression
         // then it is not safe to modify the value.
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ec835b1..8090fdf 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -56,7 +56,6 @@ STATISTIC(NumReplaced,  "Number of allocas broken up");
 STATISTIC(NumPromoted,  "Number of allocas promoted");
 STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
 STATISTIC(NumConverted, "Number of aggregates converted to scalar");
-STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
 
 namespace {
   struct SROA : public FunctionPass {
@@ -183,9 +182,6 @@ namespace {
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
     bool ShouldAttemptScalarRepl(AllocaInst *AI);
-
-    static MemTransferInst *isOnlyCopiedFromConstantGlobal(
-        AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
   };
 
   // SROA_DT - SROA that uses DominatorTree.
@@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
       // Compute the offset that this GEP adds to the pointer.
       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
-      if (!GEP->hasAllConstantIndices())
-        NonConstantIdx = Indices.pop_back_val();
+      Value* GEPNonConstantIdx = 0;
+      if (!GEP->hasAllConstantIndices()) {
+        assert(!NonConstantIdx &&
+               "Dynamic GEP reading from dynamic GEP unsupported");
+        GEPNonConstantIdx = Indices.pop_back_val();
+      } else
+        GEPNonConstantIdx = NonConstantIdx;
       uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
                                                Indices);
-      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
+      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx);
       GEP->eraseFromParent();
       continue;
     }
@@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) {
   return false;
 }
 
-/// getPointeeAlignment - Compute the minimum alignment of the value pointed
-/// to by the given pointer.
-static unsigned getPointeeAlignment(Value *V, const TargetData &TD) {
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::BitCast ||
-        (CE->getOpcode() == Instruction::GetElementPtr &&
-         cast<GEPOperator>(CE)->hasAllZeroIndices()))
-      return getPointeeAlignment(CE->getOperand(0), TD);
-
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    if (!GV->isDeclaration())
-      return TD.getPreferredAlignment(GV);
-
-  if (PointerType *PT = dyn_cast<PointerType>(V->getType()))
-    return TD.getABITypeAlignment(PT->getElementType());
-
-  return 0;
-}
-
-
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
 // which runs on all of the alloca instructions in the function, removing them
 // if they are only used by getelementptr instructions.
@@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) {
     if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
       continue;
 
-    // Check to see if this allocation is only modified by a memcpy/memmove from
-    // a constant global whose alignment is equal to or exceeds that of the
-    // allocation.  If this is the case, we can change all users to use
-    // the constant global instead.  This is commonly produced by the CFE by
-    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
-    // is only subsequently read.
-    SmallVector<Instruction *, 4> ToDelete;
-    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
-      if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
-        DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
-        DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
-        for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
-          ToDelete[i]->eraseFromParent();
-        Constant *TheSrc = cast<Constant>(Copy->getSource());
-        AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
-        Copy->eraseFromParent();  // Don't mutate the global.
-        AI->eraseFromParent();
-        ++NumGlobals;
-        Changed = true;
-        continue;
-      }
-    }
-
     // Check to see if we can perform the core SROA transformation.  We cannot
     // transform the allocation instruction if it is an array allocation
     // (allocations OF arrays are ok though), and an allocation of a scalar
@@ -2651,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
 
   return true;
 }
-
-
-
-/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
-/// some part of a constant global variable.  This intentionally only accepts
-/// constant expressions because we don't can't rewrite arbitrary instructions.
-static bool PointsToConstantGlobal(Value *V) {
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return GV->isConstant();
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-    if (CE->getOpcode() == Instruction::BitCast ||
-        CE->getOpcode() == Instruction::GetElementPtr)
-      return PointsToConstantGlobal(CE->getOperand(0));
-  return false;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
-/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
-/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
-/// track of whether it moves the pointer (with isOffset) but otherwise traverse
-/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
-/// the alloca, and if the source pointer is a pointer to a constant global, we
-/// can optimize this.
-static bool
-isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
-                               bool isOffset,
-                               SmallVector<Instruction *, 4> &LifetimeMarkers) {
-  // We track lifetime intrinsics as we encounter them.  If we decide to go
-  // ahead and replace the value with the global, this lets the caller quickly
-  // eliminate the markers.
-
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
-    User *U = cast<Instruction>(*UI);
-
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      // Ignore non-volatile loads, they are always ok.
-      if (!LI->isSimple()) return false;
-      continue;
-    }
-
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      // If uses of the bitcast are ok, we are ok.
-      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
-                                          LifetimeMarkers))
-        return false;
-      continue;
-    }
-    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
-      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
-      // doesn't, it does.
-      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
-                                          isOffset || !GEP->hasAllZeroIndices(),
-                                          LifetimeMarkers))
-        return false;
-      continue;
-    }
-
-    if (CallSite CS = U) {
-      // If this is the function being called then we treat it like a load and
-      // ignore it.
-      if (CS.isCallee(UI))
-        continue;
-
-      // If this is a readonly/readnone call site, then we know it is just a
-      // load (but one that potentially returns the value itself), so we can
-      // ignore it if we know that the value isn't captured.
-      unsigned ArgNo = CS.getArgumentNo(UI);
-      if (CS.onlyReadsMemory() &&
-          (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
-        continue;
-
-      // If this is being passed as a byval argument, the caller is making a
-      // copy, so it is only a read of the alloca.
-      if (CS.isByValArgument(ArgNo))
-        continue;
-    }
-
-    // Lifetime intrinsics can be handled by the caller.
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        assert(II->use_empty() && "Lifetime markers have no result to use!");
-        LifetimeMarkers.push_back(II);
-        continue;
-      }
-    }
-
-    // If this is isn't our memcpy/memmove, reject it as something we can't
-    // handle.
-    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
-    if (MI == 0)
-      return false;
-
-    // If the transfer is using the alloca as a source of the transfer, then
-    // ignore it since it is a load (unless the transfer is volatile).
-    if (UI.getOperandNo() == 1) {
-      if (MI->isVolatile()) return false;
-      continue;
-    }
-
-    // If we already have seen a copy, reject the second one.
-    if (TheCopy) return false;
-
-    // If the pointer has been offset from the start of the alloca, we can't
-    // safely handle this.
-    if (isOffset) return false;
-
-    // If the memintrinsic isn't using the alloca as the dest, reject it.
-    if (UI.getOperandNo() != 0) return false;
-
-    // If the source of the memcpy/move is not a constant global, reject it.
-    if (!PointsToConstantGlobal(MI->getSource()))
-      return false;
-
-    // Otherwise, the transform is safe.  Remember the copy instruction.
-    TheCopy = MI;
-  }
-  return true;
-}
-
-/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
-/// modified by a copy from a constant global.  If we can prove this, we can
-/// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *
-SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
-                                     SmallVector<Instruction*, 4> &ToDelete) {
-  MemTransferInst *TheCopy = 0;
-  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
-    return TheCopy;
-  return 0;
-}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index a1a8a41..3904419 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization {
     // These optimizations require TargetData.
     if (!TD) return 0;
 
-    EmitStrLenMemCpy(Src, Dst, Len, B);
-    return Dst;
+    return EmitStrLenMemCpy(Src, Dst, Len, B);
   }
 
-  void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+  Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B, TD);
+    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+    if (!DstLen)
+      return 0;
 
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
@@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization {
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     B.CreateMemCpy(CpyDst, Src,
                    ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+    return Dst;
   }
 };
 
@@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt {
 
     // strncat(x, s, c) -> strcat(x, s)
     // s is constant so the strcat can be optimized further
-    EmitStrLenMemCpy(Src, Dst, SrcLen, B);
-    return Dst;
+    return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
   }
 };
 
@@ -254,7 +255,7 @@ struct StrChrOpt : public LibCallOptimization {
 
       return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
                         ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                        B, TD);
+                        B, TD, TLI);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization {
     if (!getConstantStringInfo(SrcStr, Str)) {
       // strrchr(s, 0) -> strchr(s, 0)
       if (TD && CharC->isZero())
-        return EmitStrChr(SrcStr, '\0', B, TD);
+        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
       return 0;
     }
 
@@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization {
 
       return EmitMemCmp(Str1P, Str2P,
                         ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B, TD);
+                        std::min(Len1, Len2)), B, TD, TLI);
     }
 
     return 0;
@@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization {
       return ConstantInt::get(CI->getType(), 0);
 
     if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
 
     StringRef Str1, Str2;
     bool HasStr1 = getConstantStringInfo(Str1P, Str1);
@@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization {
 
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    if (OptChkCall)
-      EmitMemCpyChk(Dst, Src,
-                    ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                    CI->getArgOperand(2), B, TD);
-    else
+    if (!OptChkCall ||
+        !EmitMemCpyChk(Dst, Src,
+                       ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                       CI->getArgOperand(2), B, TD, TLI))
       B.CreateMemCpy(Dst, Src,
                      ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
     return Dst;
@@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization {
     if (!TD) return 0;
 
     Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
-    if (Dst == Src)  // stpcpy(x,x)  -> x+strlen(x)
-      return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD));
+    if (Dst == Src) {  // stpcpy(x,x)  -> x+strlen(x)
+      Value *StrLen = EmitStrLen(Src, B, TD, TLI);
+      return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+    }
 
     // See if we can get the length of the input string.
     uint64_t Len = GetStringLength(Src);
@@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization {
 
     // We have enough information to now generate the memcpy call to do the
     // copy for us.  Make a memcpy to copy the nul byte with align = 1.
-    if (OptChkCall)
-      EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD);
-    else
+    if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B,
+                                      TD, TLI))
       B.CreateMemCpy(Dst, Src, LenV, 1);
     return DstEnd;
   }
@@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization {
 
     // strpbrk(s, "a") -> strchr(s, 'a')
     if (TD && HasS2 && S2.size() == 1)
-      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI);
 
     return 0;
   }
@@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization {
 
     // strcspn(s, "") -> strlen(s)
     if (TD && HasS2 && S2.empty())
-      return EmitStrLen(CI->getArgOperand(0), B, TD);
+      return EmitStrLen(CI->getArgOperand(0), B, TD, TLI);
 
     return 0;
   }
@@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization {
 
     // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
     if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
-      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI);
+      if (!StrLen)
+        return 0;
       Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
-                                   StrLen, B, TD);
+                                   StrLen, B, TD, TLI);
+      if (!StrNCmp)
+        return 0;
       for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
            UI != UE; ) {
         ICmpInst *Old = cast<ICmpInst>(*UI++);
@@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization {
     }
 
     // fold strstr(x, "y") -> strchr(x, 'y').
-    if (HasStr2 && ToFindStr.size() == 1)
-      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
-                             ToFindStr[0], B, TD), CI->getType());
+    if (HasStr2 && ToFindStr.size() == 1) {
+      Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI);
+      return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+    }
     return 0;
   }
 };
@@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization {
 
     // printf("x") -> putchar('x'), even for '%'.
     if (FormatStr.size() == 1) {
-      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
-      if (CI->use_empty()) return CI;
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
 
@@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization {
       // pass to be run after this pass, to merge duplicate strings.
       FormatStr = FormatStr.drop_back();
       Value *GV = B.CreateGlobalString(FormatStr, "str");
-      EmitPutS(GV, B, TD);
-      return CI->use_empty() ? (Value*)CI :
-                    ConstantInt::get(CI->getType(), FormatStr.size()+1);
+      Value *NewCI = EmitPutS(GV, B, TD, TLI);
+      return (CI->use_empty() || !NewCI) ?
+              NewCI :
+              ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
 
     // Optimize specific format strings.
     // printf("%c", chr) --> putchar(chr)
     if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
         CI->getArgOperand(1)->getType()->isIntegerTy()) {
-      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI);
 
-      if (CI->use_empty()) return CI;
+      if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
 
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
         CI->getArgOperand(1)->getType()->isPointerTy()) {
-      EmitPutS(CI->getArgOperand(1), B, TD);
-      return CI;
+      return EmitPutS(CI->getArgOperand(1), B, TD, TLI);
     }
     return 0;
   }
@@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization {
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
       if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
 
-      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+      if (!Len)
+        return 0;
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
@@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization {
     // This optimisation is only valid, if the return value is unused.
     if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
-      EmitFPutC(Char, CI->getArgOperand(3), B, TD);
-      return ConstantInt::get(CI->getType(), 1);
+      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
     }
 
     return 0;
@@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization {
     // fputs(s,F) --> fwrite(s,1,strlen(s),F)
     uint64_t Len = GetStringLength(CI->getArgOperand(0));
     if (!Len) return 0;
-    EmitFWrite(CI->getArgOperand(0),
-               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-               CI->getArgOperand(1), B, TD, TLI);
-    return CI;  // Known to have no uses (see above).
+    // Known to have no uses (see above).
+    return EmitFWrite(CI->getArgOperand(0),
+                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+                      CI->getArgOperand(1), B, TD, TLI);
   }
 };
 
@@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization {
       // These optimizations require TargetData.
       if (!TD) return 0;
 
-      EmitFWrite(CI->getArgOperand(1),
-                 ConstantInt::get(TD->getIntPtrType(*Context),
-                                  FormatStr.size()),
-                 CI->getArgOperand(0), B, TD, TLI);
-      return ConstantInt::get(CI->getType(), FormatStr.size());
+      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+                                ConstantInt::get(TD->getIntPtrType(*Context),
+                                                 FormatStr.size()),
+                                CI->getArgOperand(0), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
     }
 
     // The remaining optimizations require the format string to be "%s" or "%c"
@@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization {
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> fputc(chr, F)
       if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
-      return ConstantInt::get(CI->getType(), 1);
+      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+                               TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) --> fputs(str, F)
       if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
         return 0;
-      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
-      return CI;
+      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
     }
     return 0;
   }
@@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization {
 
     if (Str.empty() && CI->use_empty()) {
       // puts("") -> putchar('\n')
-      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
-      if (CI->use_empty()) return CI;
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
 
@@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["llvm.exp2.f64"] = &Exp2;
   Optimizations["llvm.exp2.f32"] = &Exp2;
 
+  if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
+    Optimizations["fabs"] = &UnaryDoubleFP;
   if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
     Optimizations["floor"] = &UnaryDoubleFP;
   if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
@@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() {
     Optimizations["rint"] = &UnaryDoubleFP;
   if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
     Optimizations["nearbyint"] = &UnaryDoubleFP;
+  if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
+    Optimizations["trunc"] = &UnaryDoubleFP;
 
   // Integer Optimizations
   Optimizations["ffs"] = &FFS;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 5576432..2679b93 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
   // If the return instruction returns a value, and if the value was a
   // PHI node in "BB", propagate the right value into the return.
   for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
-       i != e; ++i)
-    if (PHINode *PN = dyn_cast<PHINode>(*i))
-      if (PN->getParent() == BB)
-        *i = PN->getIncomingValueForBlock(Pred);
+       i != e; ++i) {
+    Value *V = *i;
+    Instruction *NewBC = 0;
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+      // Return value might be bitcasted. Clone and insert it before the
+      // return instruction.
+      V = BCI->getOperand(0);
+      NewBC = BCI->clone();
+      Pred->getInstList().insert(NewRet, NewBC);
+      *i = NewBC;
+    }
+    if (PHINode *PN = dyn_cast<PHINode>(V)) {
+      if (PN->getParent() == BB) {
+        if (NewBC)
+          NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred));
+        else
+          *i = PN->getIncomingValueForBlock(Pred);
+      }
+    }
+  }
       
   // Update any PHI nodes in the returning block to realize that we no
   // longer branch to them.
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 27f7724..e13fd71 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
 /// specified pointer.  This always returns an integer value of size intptr_t.
-Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD,
+                        const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::strlen))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
   return CI;
 }
 
+/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
+/// specified pointer.  Ptr is required to be some pointer type, MaxLen must
+/// be of size_t type, and the return value has 'intptr_t' type.
+Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
+                         const TargetData *TD, const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::strnlen))
+    return 0;
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
+                                             TD->getIntPtrType(Context),
+                                             B.getInt8PtrTy(),
+                                             TD->getIntPtrType(Context),
+                                             NULL);
+  CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen");
+  if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
 /// EmitStrChr - Emit a call to the strchr function to the builder, for the
 /// specified pointer and character.  Ptr is required to be some pointer type,
 /// and the return value has 'i8*' type.
 Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
-                        const TargetData *TD) {
+                        const TargetData *TD, const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::strchr))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI =
     AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
 
 /// EmitStrNCmp - Emit a call to the strncmp function to the builder.
 Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
-                         IRBuilder<> &B, const TargetData *TD) {
+                         IRBuilder<> &B, const TargetData *TD,
+                         const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::strncmp))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
 /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                        const TargetData *TD, StringRef Name) {
+                        const TargetData *TD, const TargetLibraryInfo *TLI,
+                        StringRef Name) {
+  if (!TLI->has(LibFunc::strcpy))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
-                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+                         IRBuilder<> &B, const TargetData *TD,
+                         const TargetLibraryInfo *TLI, StringRef Name) {
+  if (!TLI->has(LibFunc::strncpy))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
 /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
 /// are pointers.
 Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
-                           IRBuilder<> &B, const TargetData *TD) {
+                           IRBuilder<> &B, const TargetData *TD,
+                           const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::memcpy_chk))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
   AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
@@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
 /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
 Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
-                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+                        Value *Len, IRBuilder<> &B, const TargetData *TD,
+                        const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::memchr))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
   AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
@@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
 
 /// EmitMemCmp - Emit a call to the memcmp function.
 Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
-                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+                        Value *Len, IRBuilder<> &B, const TargetData *TD,
+                        const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::memcmp))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
 
 /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
 /// is an integer.
-Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD,
+                         const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::putchar))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
                                           B.getInt32Ty(), NULL);
@@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
 
 /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
 /// some pointer.
-void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD,
+                      const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::puts))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
   CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
   if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
-
+  return CI;
 }
 
 /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
 /// an integer and File is a pointer to FILE.
-void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
-                     const TargetData *TD) {
+Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                       const TargetData *TD, const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::fputc))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
@@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
+  return CI;
 }
 
 /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
 /// pointer and File is a pointer to FILE.
-void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
-                     const TargetData *TD, const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                       const TargetData *TD, const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::fputs))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
+  return CI;
 }
 
 /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
 /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
-                      IRBuilder<> &B, const TargetData *TD,
-                      const TargetLibraryInfo *TLI) {
+Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                        IRBuilder<> &B, const TargetData *TD,
+                        const TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::fwrite))
+    return 0;
+
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
@@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
 
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
+  return CI;
 }
 
 SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
 
-bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD,
+                                     const TargetLibraryInfo *TLI) {
   // We really need TargetData for later.
   if (!TD) return false;
   
@@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
     // string lengths for varying.
     if (isFoldable(2, 1, true)) {
       Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
-                              Name.substr(2, 6));
+                              TLI, Name.substr(2, 6));
+      if (!Ret)
+        return false;
       replaceCall(Ret);
       return true;
     }
@@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
 
     if (isFoldable(3, 2, false)) {
       Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                               CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+                               CI->getArgOperand(2), B, TD, TLI,
+                               Name.substr(2, 7));
+      if (!Ret)
+        return false;
       replaceCall(Ret);
       return true;
     }
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b3f5289..72d4199 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
   : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
 
 SSAUpdater::~SSAUpdater() {
-  delete &getAvailableVals(AV);
+  delete static_cast<AvailableValsTy*>(AV);
 }
 
 /// Initialize - Reset this object to get ready for a new set of SSA
@@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) {
   else
     V = GetValueInMiddleOfBlock(User->getParent());
 
+  // Notify that users of the existing value that it is being replaced.
+  Value *OldVal = U.get();
+  if (OldVal != V && OldVal->hasValueHandle())
+    ValueHandleBase::ValueIsRAUWd(OldVal, V);
+
   U.set(V);
 }
 
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index aedb86b..c09c69b 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Operator.h"
 #include "llvm/Module.h"
+#include "llvm/TypeFinder.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
@@ -145,7 +146,7 @@ class TypePrinting {
 public:
 
   /// NamedTypes - The named types that are used by the current module.
-  std::vector<StructType*> NamedTypes;
+  TypeFinder NamedTypes;
 
   /// NumberedTypes - The numbered types, along with their value.
   DenseMap<StructType*, unsigned> NumberedTypes;
@@ -164,7 +165,7 @@ public:
 
 
 void TypePrinting::incorporateTypes(const Module &M) {
-  M.findUsedStructTypes(NamedTypes);
+  NamedTypes.run(M, false);
 
   // The list of struct types we got back includes all the struct types, split
   // the unnamed ones out to a numbering and remove the anonymous structs.
@@ -1352,12 +1353,12 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
   case GlobalValue::LinkerPrivateWeakLinkage:
     Out << "linker_private_weak ";
     break;
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
-    Out << "linker_private_weak_def_auto ";
-    break;
   case GlobalValue::InternalLinkage:      Out << "internal ";       break;
   case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
   case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
+    Out << "linkonce_odr_auto_hide ";
+    break;
   case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
   case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
   case GlobalValue::CommonLinkage:        Out << "common ";         break;
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index d466ac6..c8219eb 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -88,6 +88,9 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
     Result += " ";
   }
+  if (Attrs & Attribute::IANSDialect)
+    Result += "ia_nsdialect ";
+
   // Trim the trailing space.
   assert(!Result.empty() && "Unknown attribute!");
   Result.erase(Result.end()-1);
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 648ccbd..6a20be6 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -31,6 +31,7 @@ add_llvm_library(LLVMCore
   PassRegistry.cpp
   PrintModulePass.cpp
   Type.cpp
+  TypeFinder.cpp
   Use.cpp
   User.cpp
   Value.cpp
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 972db3c..a56f1b2 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1084,6 +1084,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
     return LLVMLinkOnceAnyLinkage;
   case GlobalValue::LinkOnceODRLinkage:
     return LLVMLinkOnceODRLinkage;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:
+    return LLVMLinkOnceODRAutoHideLinkage;
   case GlobalValue::WeakAnyLinkage:
     return LLVMWeakAnyLinkage;
   case GlobalValue::WeakODRLinkage:
@@ -1098,8 +1100,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
     return LLVMLinkerPrivateLinkage;
   case GlobalValue::LinkerPrivateWeakLinkage:
     return LLVMLinkerPrivateWeakLinkage;
-  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
-    return LLVMLinkerPrivateWeakDefAutoLinkage;
   case GlobalValue::DLLImportLinkage:
     return LLVMDLLImportLinkage;
   case GlobalValue::DLLExportLinkage:
@@ -1129,6 +1129,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
   case LLVMLinkOnceODRLinkage:
     GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
     break;
+  case LLVMLinkOnceODRAutoHideLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage);
+    break;
   case LLVMWeakAnyLinkage:
     GV->setLinkage(GlobalValue::WeakAnyLinkage);
     break;
@@ -1150,9 +1153,6 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
   case LLVMLinkerPrivateWeakLinkage:
     GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
     break;
-  case LLVMLinkerPrivateWeakDefAutoLinkage:
-    GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
-    break;
   case LLVMDLLImportLinkage:
     GV->setLinkage(GlobalValue::DLLImportLinkage);
     break;
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 219e631..77b2403 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -39,6 +39,19 @@ static cl::opt<bool,true>
 VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
                cl::desc("Verify dominator info (time consuming)"));
 
+bool BasicBlockEdge::isSingleEdge() const {
+  const TerminatorInst *TI = Start->getTerminator();
+  unsigned NumEdgesToEnd = 0;
+  for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) {
+    if (TI->getSuccessor(i) == End)
+      ++NumEdgesToEnd;
+    if (NumEdgesToEnd >= 2)
+      return false;
+  }
+  assert(NumEdgesToEnd == 1);
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 //  DominatorTree Implementation
 //===----------------------------------------------------------------------===//
@@ -142,12 +155,27 @@ bool DominatorTree::dominates(const Instruction *Def,
   // Invoke results are only usable in the normal destination, not in the
   // exceptional destination.
   BasicBlock *NormalDest = II->getNormalDest();
-  if (!dominates(NormalDest, UseBB))
+  BasicBlockEdge E(DefBB, NormalDest);
+  return dominates(E, UseBB);
+}
+
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
+                              const BasicBlock *UseBB) const {
+  // Assert that we have a single edge. We could handle them by simply
+  // returning false, but since isSingleEdge is linear on the number of
+  // edges, the callers can normally handle them more efficiently.
+  assert(BBE.isSingleEdge());
+
+  // If the BB the edge ends in doesn't dominate the use BB, then the
+  // edge also doesn't.
+  const BasicBlock *Start = BBE.getStart();
+  const BasicBlock *End = BBE.getEnd();
+  if (!dominates(End, UseBB))
     return false;
 
-  // Simple case: if the normal destination has a single predecessor, the
-  // fact that it dominates the use block implies that we also do.
-  if (NormalDest->getSinglePredecessor())
+  // Simple case: if the end BB has a single predecessor, the fact that it
+  // dominates the use block implies that the edge also does.
+  if (End->getSinglePredecessor())
     return true;
 
   // The normal edge from the invoke is critical. Conceptually, what we would
@@ -170,29 +198,45 @@ bool DominatorTree::dominates(const Instruction *Def,
   // trivially dominates itself, so we only have to find if it dominates the
   // other predecessors. Since the only way out of X is via NormalDest, X can
   // only properly dominate a node if NormalDest dominates that node too.
-  for (pred_iterator PI = pred_begin(NormalDest),
-         E = pred_end(NormalDest); PI != E; ++PI) {
+  for (const_pred_iterator PI = pred_begin(End), E = pred_end(End);
+       PI != E; ++PI) {
     const BasicBlock *BB = *PI;
-    if (BB == DefBB)
+    if (BB == Start)
       continue;
 
-    if (!DT->isReachableFromEntry(BB))
-      continue;
-
-    if (!dominates(NormalDest, BB))
+    if (!dominates(End, BB))
       return false;
   }
   return true;
 }
 
-bool DominatorTree::dominates(const Instruction *Def,
+bool DominatorTree::dominates(const BasicBlockEdge &BBE,
                               const Use &U) const {
-  Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+  // Assert that we have a single edge. We could handle them by simply
+  // returning false, but since isSingleEdge is linear on the number of
+  // edges, the callers can normally handle them more efficiently.
+  assert(BBE.isSingleEdge());
+
+  Instruction *UserInst = cast<Instruction>(U.getUser());
+  // A PHI in the end of the edge is dominated by it.
+  PHINode *PN = dyn_cast<PHINode>(UserInst);
+  if (PN && PN->getParent() == BBE.getEnd() &&
+      PN->getIncomingBlock(U) == BBE.getStart())
+    return true;
 
-  // Instructions do not dominate non-instructions.
-  if (!UserInst)
-    return false;
+  // Otherwise use the edge-dominates-block query, which
+  // handles the crazy critical edge cases properly.
+  const BasicBlock *UseBB;
+  if (PN)
+    UseBB = PN->getIncomingBlock(U);
+  else
+    UseBB = UserInst->getParent();
+  return dominates(BBE, UseBB);
+}
 
+bool DominatorTree::dominates(const Instruction *Def,
+                              const Use &U) const {
+  Instruction *UserInst = cast<Instruction>(U.getUser());
   const BasicBlock *DefBB = Def->getParent();
 
   // Determine the block in which the use happens. PHI nodes use
@@ -218,17 +262,9 @@ bool DominatorTree::dominates(const Instruction *Def,
   // their own block, except possibly a phi, so we don't need to
   // walk the block in any case.
   if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
-    // A PHI in the normal successor using the invoke's return value is
-    // dominated by the invoke's return value.
-    if (isa<PHINode>(UserInst) &&
-        UserInst->getParent() == II->getNormalDest() &&
-        cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB)
-      return true;
-
-    // Otherwise use the instruction-dominates-block query, which
-    // handles the crazy case of an invoke with a critical edge
-    // properly.
-    return dominates(Def, UseBB);
+    BasicBlock *NormalDest = II->getNormalDest();
+    BasicBlockEdge E(DefBB, NormalDest);
+    return dominates(E, U);
   }
 
   // If the def and use are in different blocks, do a simple CFG dominator
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ede4626..95e5a8b 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -200,7 +200,7 @@ const Function *MDNode::getFunction() const {
 // destroy - Delete this node.  Only when there are no uses.
 void MDNode::destroy() {
   setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
-  // Placement delete, the free the memory.
+  // Placement delete, then free the memory.
   this->~MDNode();
   free(this);
 }
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 8ea3665..5b5176b 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -467,143 +467,3 @@ void Module::removeLibrary(StringRef Lib) {
       return;
     }
 }
-
-//===----------------------------------------------------------------------===//
-// Type finding functionality.
-//===----------------------------------------------------------------------===//
-
-namespace {
-  /// TypeFinder - Walk over a module, identifying all of the types that are
-  /// used by the module.
-  class TypeFinder {
-    // To avoid walking constant expressions multiple times and other IR
-    // objects, we keep several helper maps.
-    DenseSet<const Value*> VisitedConstants;
-    DenseSet<Type*> VisitedTypes;
-
-    std::vector<StructType*> &StructTypes;
-    bool OnlyNamed;
-  public:
-    TypeFinder(std::vector<StructType*> &structTypes, bool onlyNamed)
-      : StructTypes(structTypes), OnlyNamed(onlyNamed) {}
-
-    void run(const Module &M) {
-      // Get types from global variables.
-      for (Module::const_global_iterator I = M.global_begin(),
-           E = M.global_end(); I != E; ++I) {
-        incorporateType(I->getType());
-        if (I->hasInitializer())
-          incorporateValue(I->getInitializer());
-      }
-
-      // Get types from aliases.
-      for (Module::const_alias_iterator I = M.alias_begin(),
-           E = M.alias_end(); I != E; ++I) {
-        incorporateType(I->getType());
-        if (const Value *Aliasee = I->getAliasee())
-          incorporateValue(Aliasee);
-      }
-
-      // Get types from functions.
-      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
-      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
-        incorporateType(FI->getType());
-
-        // First incorporate the arguments.
-        for (Function::const_arg_iterator AI = FI->arg_begin(),
-               AE = FI->arg_end(); AI != AE; ++AI)
-          incorporateValue(AI);
-
-        for (Function::const_iterator BB = FI->begin(), E = FI->end();
-             BB != E;++BB)
-          for (BasicBlock::const_iterator II = BB->begin(),
-               E = BB->end(); II != E; ++II) {
-            const Instruction &I = *II;
-            // Incorporate the type of the instruction.
-            incorporateType(I.getType());
-
-            // Incorporate non-instruction operand types. (We are incorporating
-            // all instructions with this loop.)
-            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
-                 OI != OE; ++OI)
-              if (!isa<Instruction>(OI))
-                incorporateValue(*OI);
-
-            // Incorporate types hiding in metadata.
-            I.getAllMetadataOtherThanDebugLoc(MDForInst);
-            for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
-              incorporateMDNode(MDForInst[i].second);
-            MDForInst.clear();
-          }
-      }
-
-      for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
-           E = M.named_metadata_end(); I != E; ++I) {
-        const NamedMDNode *NMD = I;
-        for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
-          incorporateMDNode(NMD->getOperand(i));
-      }
-    }
-
-  private:
-    void incorporateType(Type *Ty) {
-      // Check to see if we're already visited this type.
-      if (!VisitedTypes.insert(Ty).second)
-        return;
-
-      // If this is a structure or opaque type, add a name for the type.
-      if (StructType *STy = dyn_cast<StructType>(Ty))
-        if (!OnlyNamed || STy->hasName())
-          StructTypes.push_back(STy);
-
-      // Recursively walk all contained types.
-      for (Type::subtype_iterator I = Ty->subtype_begin(),
-           E = Ty->subtype_end(); I != E; ++I)
-        incorporateType(*I);
-    }
-
-    /// incorporateValue - This method is used to walk operand lists finding
-    /// types hiding in constant expressions and other operands that won't be
-    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
-    /// inst operands are all explicitly enumerated.
-    void incorporateValue(const Value *V) {
-      if (const MDNode *M = dyn_cast<MDNode>(V))
-        return incorporateMDNode(M);
-      if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
-
-      // Already visited?
-      if (!VisitedConstants.insert(V).second)
-        return;
-
-      // Check this type.
-      incorporateType(V->getType());
-
-      // If this is an instruction, we incorporate it separately.
-      if (isa<Instruction>(V))
-        return;
-
-      // Look in operands for types.
-      const User *U = cast<User>(V);
-      for (Constant::const_op_iterator I = U->op_begin(),
-           E = U->op_end(); I != E;++I)
-        incorporateValue(*I);
-    }
-
-    void incorporateMDNode(const MDNode *V) {
-
-      // Already visited?
-      if (!VisitedConstants.insert(V).second)
-        return;
-
-      // Look in operands for types.
-      for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
-        if (Value *Op = V->getOperand(i))
-          incorporateValue(Op);
-    }
-  };
-} // end anonymous namespace
-
-void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes,
-                                 bool OnlyNamed) const {
-  TypeFinder(StructTypes, OnlyNamed).run(*this);
-}
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index c6f3558..5e9a00f 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -464,19 +464,26 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
 void StructType::setName(StringRef Name) {
   if (Name == getName()) return;
 
-  // If this struct already had a name, remove its symbol table entry.
-  if (SymbolTableEntry) {
-    getContext().pImpl->NamedStructTypes.erase(getName());
-    SymbolTableEntry = 0;
-  }
-  
+  StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes;
+  typedef StringMap<StructType *>::MapEntryTy EntryTy;
+
+  // If this struct already had a name, remove its symbol table entry. Don't
+  // delete the data yet because it may be part of the new name.
+  if (SymbolTableEntry)
+    SymbolTable.remove((EntryTy *)SymbolTableEntry);
+
   // If this is just removing the name, we're done.
-  if (Name.empty())
+  if (Name.empty()) {
+    if (SymbolTableEntry) {
+      // Delete the old string data.
+      ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
+      SymbolTableEntry = 0;
+    }
     return;
+  }
   
   // Look up the entry for the name.
-  StringMapEntry<StructType*> *Entry =
-    &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+  EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
   
   // While we have a name collision, try a random rename.
   if (Entry->getValue()) {
@@ -497,7 +504,10 @@ void StructType::setName(StringRef Name) {
 
   // Okay, we found an entry that isn't used.  It's us!
   Entry->setValue(this);
-    
+
+  // Delete the old string data.
+  if (SymbolTableEntry)
+    ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
   SymbolTableEntry = Entry;
 }
 
diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp
new file mode 100644
index 0000000..4de649f
--- /dev/null
+++ b/lib/VMCore/TypeFinder.cpp
@@ -0,0 +1,148 @@
+//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeFinder class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TypeFinder.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+void TypeFinder::run(const Module &M, bool onlyNamed) {
+  OnlyNamed = onlyNamed;
+
+  // Get types from global variables.
+  for (Module::const_global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    incorporateType(I->getType());
+    if (I->hasInitializer())
+      incorporateValue(I->getInitializer());
+  }
+
+  // Get types from aliases.
+  for (Module::const_alias_iterator I = M.alias_begin(),
+         E = M.alias_end(); I != E; ++I) {
+    incorporateType(I->getType());
+    if (const Value *Aliasee = I->getAliasee())
+      incorporateValue(Aliasee);
+  }
+
+  // Get types from functions.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+  for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+    incorporateType(FI->getType());
+
+    // First incorporate the arguments.
+    for (Function::const_arg_iterator AI = FI->arg_begin(),
+           AE = FI->arg_end(); AI != AE; ++AI)
+      incorporateValue(AI);
+
+    for (Function::const_iterator BB = FI->begin(), E = FI->end();
+         BB != E;++BB)
+      for (BasicBlock::const_iterator II = BB->begin(),
+             E = BB->end(); II != E; ++II) {
+        const Instruction &I = *II;
+
+        // Incorporate the type of the instruction.
+        incorporateType(I.getType());
+
+        // Incorporate non-instruction operand types. (We are incorporating all
+        // instructions with this loop.)
+        for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+             OI != OE; ++OI)
+          if (!isa<Instruction>(OI))
+            incorporateValue(*OI);
+
+        // Incorporate types hiding in metadata.
+        I.getAllMetadataOtherThanDebugLoc(MDForInst);
+        for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+          incorporateMDNode(MDForInst[i].second);
+
+        MDForInst.clear();
+      }
+  }
+
+  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+         E = M.named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      incorporateMDNode(NMD->getOperand(i));
+  }
+}
+
+void TypeFinder::clear() {
+  VisitedConstants.clear();
+  VisitedTypes.clear();
+  StructTypes.clear();
+}
+
+/// incorporateType - This method adds the type to the list of used structures
+/// if it's not in there already.
+void TypeFinder::incorporateType(Type *Ty) {
+  // Check to see if we're already visited this type.
+  if (!VisitedTypes.insert(Ty).second)
+    return;
+
+  // If this is a structure or opaque type, add a name for the type.
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!OnlyNamed || STy->hasName())
+      StructTypes.push_back(STy);
+
+  // Recursively walk all contained types.
+  for (Type::subtype_iterator I = Ty->subtype_begin(),
+         E = Ty->subtype_end(); I != E; ++I)
+    incorporateType(*I);
+}
+
+/// incorporateValue - This method is used to walk operand lists finding types
+/// hiding in constant expressions and other operands that won't be walked in
+/// other ways.  GlobalValues, basic blocks, instructions, and inst operands are
+/// all explicitly enumerated.
+void TypeFinder::incorporateValue(const Value *V) {
+  if (const MDNode *M = dyn_cast<MDNode>(V))
+    return incorporateMDNode(M);
+
+  if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+  // Already visited?
+  if (!VisitedConstants.insert(V).second)
+    return;
+
+  // Check this type.
+  incorporateType(V->getType());
+
+  // If this is an instruction, we incorporate it separately.
+  if (isa<Instruction>(V))
+    return;
+
+  // Look in operands for types.
+  const User *U = cast<User>(V);
+  for (Constant::const_op_iterator I = U->op_begin(),
+         E = U->op_end(); I != E;++I)
+    incorporateValue(*I);
+}
+
+/// incorporateMDNode - This method is used to walk the operands of an MDNode to
+/// find types hiding within.
+void TypeFinder::incorporateMDNode(const MDNode *V) {
+  // Already visited?
+  if (!VisitedConstants.insert(V).second)
+    return;
+
+  // Look in operands for types.
+  for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+    if (Value *Op = V->getOperand(i))
+      incorporateValue(Op);
+}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 9a8e185..d1ca953 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -71,6 +71,10 @@ bool EVT::isExtended512BitVector() const {
   return isExtendedVector() && getSizeInBits() == 512;
 }
 
+bool EVT::isExtended1024BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 1024;
+}
+
 EVT EVT::getExtendedVectorElementType() const {
   assert(isExtended() && "Type is not extended!");
   return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -128,10 +132,12 @@ std::string EVT::getEVTString() const {
   case MVT::v2i32:   return "v2i32";
   case MVT::v4i32:   return "v4i32";
   case MVT::v8i32:   return "v8i32";
+  case MVT::v16i32:  return "v16i32";
   case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
   case MVT::v4i64:   return "v4i64";
   case MVT::v8i64:   return "v8i64";
+  case MVT::v16i64:  return "v16i64";
   case MVT::v2f32:   return "v2f32";
   case MVT::v2f16:   return "v2f16";
   case MVT::v4f32:   return "v4f32";
@@ -177,10 +183,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
   case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
   case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
+  case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
   case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
+  case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
   case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
   case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 5d51f41..c932d9e 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -400,8 +400,8 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
             "Only global arrays can have appending linkage!", GVar);
   }
 
-  Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
-          "linker_private_weak_def_auto can only have default visibility!",
+  Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(),
+          "linkonce_odr_auto_hide can only have default visibility!",
           &GV);
 }
 
@@ -1093,7 +1093,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
 
   // BitCast implies a no-op cast of type only. No bits change.
   // However, you can't cast pointers to anything but pointers.
-  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+  Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(),
           "Bitcast requires both operands to be pointer or neither", &I);
   Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
 
@@ -1378,6 +1378,15 @@ void Verifier::visitLoadInst(LoadInst &LI) {
             "Load cannot have Release ordering", &LI);
     Assert1(LI.getAlignment() != 0,
             "Atomic load must specify explicit alignment", &LI);
+    if (!ElTy->isPointerTy()) {
+      Assert2(ElTy->isIntegerTy(),
+              "atomic store operand must have integer type!",
+              &LI, ElTy);
+      unsigned Size = ElTy->getPrimitiveSizeInBits();
+      Assert2(Size >= 8 && !(Size & (Size - 1)),
+              "atomic store operand must be power-of-two byte-sized integer",
+              &LI, ElTy);
+    }
   } else {
     Assert1(LI.getSynchScope() == CrossThread,
             "Non-atomic load cannot have SynchronizationScope specified", &LI);
@@ -1444,6 +1453,15 @@ void Verifier::visitStoreInst(StoreInst &SI) {
             "Store cannot have Acquire ordering", &SI);
     Assert1(SI.getAlignment() != 0,
             "Atomic store must specify explicit alignment", &SI);
+    if (!ElTy->isPointerTy()) {
+      Assert2(ElTy->isIntegerTy(),
+              "atomic store operand must have integer type!",
+              &SI, ElTy);
+      unsigned Size = ElTy->getPrimitiveSizeInBits();
+      Assert2(Size >= 8 && !(Size & (Size - 1)),
+              "atomic store operand must be power-of-two byte-sized integer",
+              &SI, ElTy);
+    }
   } else {
     Assert1(SI.getSynchScope() == CrossThread,
             "Non-atomic store cannot have SynchronizationScope specified", &SI);
@@ -1471,6 +1489,13 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
   PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType());
   Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI);
   Type *ElTy = PTy->getElementType();
+  Assert2(ElTy->isIntegerTy(),
+          "cmpxchg operand must have integer type!",
+          &CXI, ElTy);
+  unsigned Size = ElTy->getPrimitiveSizeInBits();
+  Assert2(Size >= 8 && !(Size & (Size - 1)),
+          "cmpxchg operand must be power-of-two byte-sized integer",
+          &CXI, ElTy);
   Assert2(ElTy == CXI.getOperand(1)->getType(),
           "Expected value type does not match pointer operand type!",
           &CXI, ElTy);
@@ -1488,6 +1513,13 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
   PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType());
   Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI);
   Type *ElTy = PTy->getElementType();
+  Assert2(ElTy->isIntegerTy(),
+          "atomicrmw operand must have integer type!",
+          &RMWI, ElTy);
+  unsigned Size = ElTy->getPrimitiveSizeInBits();
+  Assert2(Size >= 8 && !(Size & (Size - 1)),
+          "atomicrmw operand must be power-of-two byte-sized integer",
+          &RMWI, ElTy);
   Assert2(ElTy == RMWI.getOperand(1)->getType(),
           "Argument value type does not match pointer operand type!",
           &RMWI, ElTy);
@@ -1536,7 +1568,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
   // landing pad block may be branched to only by the unwind edge of an invoke.
   for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
     const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
-    Assert1(II && II->getUnwindDest() == BB,
+    Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
             "Block containing LandingPadInst must be jumped to "
             "only by the unwind edge of an invoke.", &LPI);
   }
@@ -1575,6 +1607,13 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
 
 void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
   Instruction *Op = cast<Instruction>(I.getOperand(i));
+  // If the we have an invalid invoke, don't try to compute the dominance.
+  // We already reject it in the invoke specific checks and the dominance
+  // computation doesn't handle multiple edges.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+    if (II->getNormalDest() == II->getUnwindDest())
+      return;
+  }
 
   const Use &U = I.getOperandUse(i);
   Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U),
diff --git a/test/Analysis/GlobalsModRef/volatile-instrs.ll b/test/Analysis/GlobalsModRef/volatile-instrs.ll
new file mode 100644
index 0000000..49bce67
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/volatile-instrs.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.anon = type { i32, i32, i32 }
+@b = global %struct.anon { i32 1, i32 0, i32 0 }, align 4
+@c = common global i32 0, align 4
+@a = common global %struct.anon zeroinitializer, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+
+; Make sure that the initial memcpy call does not go away
+; because the volatile load is in the way. PR12899
+
+; CHECK: main_entry:
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64
+
+define i32 @main() nounwind uwtable ssp {
+main_entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false)
+  %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4, !tbaa !0
+  store i32 %0, i32* @c, align 4, !tbaa !0
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
index eee4ec4..3f04e2e 100644
--- a/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
+++ b/test/Analysis/ScalarEvolution/2012-05-29-MulAddRec.ll
@@ -16,7 +16,7 @@
 ; CHECK: for.body:
 ; CHECK: %inc.9 = add i8 %inc.8, 1
 ; CHECK: %0 = add i8 %inc1, 10
-; CHEKC: br label %for.cond
+; CHECK: br label %for.cond
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 define void @func() noreturn nounwind uwtable ssp {
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 06f1b6f..e946d7a 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -80,3 +80,24 @@ for.cond539.preheader:
   unreachable
 }
 ; CHECK: Determining loop execution counts for: @test3
+
+; PR13489
+; We used to crash on this too.
+
+define void @test4() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %v2.02 = phi i64 [ 2, %entry ], [ %phitmp, %for.body ]
+  %v1.01 = phi i64 [ -2, %entry ], [ %sub1, %for.body ]
+  %sub1 = sub i64 %v1.01, %v2.02
+  %phitmp = add i64 %v2.02, 2
+  %tobool = icmp eq i64 %sub1, %phitmp
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @test4
diff --git a/test/Assembler/2003-06-17-InvokeDisassemble.ll b/test/Assembler/2003-06-17-InvokeDisassemble.ll
deleted file mode 100644
index 8a9670e..0000000
--- a/test/Assembler/2003-06-17-InvokeDisassemble.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis
-
-define void @test() {
-  invoke void @test( )
-    to label %Next unwind label %Next
-
-Next:           ; preds = %0, %0
-  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-            cleanup
-  ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index bb5afe2..991cc9d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -19,6 +19,7 @@ add_lit_testsuite(check-llvm "Running the LLVM regression tests"
           llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
           macho-dump opt
           FileCheck count not
+          yaml2obj
   )
 set_target_properties(check-llvm PROPERTIES FOLDER "Tests")
 
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index 2faa04a..e84ce0e 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
@@ -21,12 +21,6 @@ for.body:                                         ; preds = %_Z14printIsNotZeroi
   %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i
-
-if.then.i:                                        ; preds = %for.body
-  unreachable
-
-_Z14printIsNotZeroi.exit:                         ; preds = %for.body
   %tmp8 = load i32* %x, align 4, !tbaa !0
   %tmp11 = load i32* %y, align 4, !tbaa !0
   %mul = mul nsw i32 %tmp11, %tmp8
@@ -37,7 +31,7 @@ if.then.i16:                                      ; preds = %_Z14printIsNotZeroi
   unreachable
 
 _Z14printIsNotZeroi.exit17:                       ; preds = %_Z14printIsNotZeroi.exit
-  br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end
+  br label %_Z14printIsNotZeroi.exit17.for.body_crit_edge
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 6fbae19..f80b44f 100644
--- a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -33,16 +33,16 @@ define void @test_cos(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}cosf
 
 ; CHECK:      vstmia  {{.*}}
@@ -64,16 +64,16 @@ define void @test_exp(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}expf
 
 ; CHECK:      vstmia  {{.*}}
@@ -95,16 +95,16 @@ define void @test_exp2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}exp2f
 
 ; CHECK:      vstmia  {{.*}}
@@ -126,16 +126,16 @@ define void @test_log10(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log10f
 
 ; CHECK:      vstmia  {{.*}}
@@ -157,16 +157,16 @@ define void @test_log(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}logf
 
 ; CHECK:      vstmia  {{.*}}
@@ -188,16 +188,16 @@ define void @test_log2(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}log2f
 
 ; CHECK:      vstmia  {{.*}}
@@ -220,16 +220,16 @@ define void @test_pow(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}powf
 
 ; CHECK:      vstmia  {{.*}}
@@ -277,16 +277,16 @@ define void @test_sin(<4 x float>* %X) nounwind {
 ; CHECK:      movt  [[reg0]], :upper16:{{.*}}
 ; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
-; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      {{v?mov(.32)?}}  r0,
 ; CHECK:      bl  {{.*}}sinf
 
 ; CHECK:      vstmia  {{.*}}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
new file mode 100644
index 0000000..e4ad45b
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s
+; PR13377
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable {
+  br i1 undef, label %4, label %5
+
+; <label>:4                                       ; preds = %3
+  unreachable
+
+; <label>:5                                       ; preds = %3
+  br i1 undef, label %7, label %6
+
+; <label>:6                                       ; preds = %5
+  unreachable
+
+; <label>:7                                       ; preds = %5
+  br i1 undef, label %8, label %10
+
+; <label>:8                                       ; preds = %7
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br i1 undef, label %11, label %10
+
+; <label>:10                                      ; preds = %9, %8, %7
+  unreachable
+
+; <label>:11                                      ; preds = %9
+  br i1 undef, label %13, label %12
+
+; <label>:12                                      ; preds = %11
+  unreachable
+
+; <label>:13                                      ; preds = %11
+  br i1 undef, label %15, label %14
+
+; <label>:14                                      ; preds = %13
+  unreachable
+
+; <label>:15                                      ; preds = %13
+  br i1 undef, label %18, label %16
+
+; <label>:16                                      ; preds = %15
+  br i1 undef, label %17, label %18
+
+; <label>:17                                      ; preds = %16
+  unreachable
+
+; <label>:18                                      ; preds = %16, %15
+  br i1 undef, label %68, label %19
+
+; <label>:19                                      ; preds = %18
+  br label %20
+
+; <label>:20                                      ; preds = %20, %19
+  br i1 undef, label %21, label %20
+
+; <label>:21                                      ; preds = %20
+  br i1 undef, label %22, label %68
+
+; <label>:22                                      ; preds = %21
+  br i1 undef, label %23, label %24
+
+; <label>:23                                      ; preds = %22
+  unreachable
+
+; <label>:24                                      ; preds = %22
+  br i1 undef, label %26, label %25
+
+; <label>:25                                      ; preds = %24
+  unreachable
+
+; <label>:26                                      ; preds = %24
+  br i1 undef, label %28, label %27
+
+; <label>:27                                      ; preds = %26
+  unreachable
+
+; <label>:28                                      ; preds = %26
+  br i1 undef, label %29, label %30, !prof !0
+
+; <label>:29                                      ; preds = %28
+  br label %30
+
+; <label>:30                                      ; preds = %29, %28
+  br i1 undef, label %31, label %32, !prof !0
+
+; <label>:31                                      ; preds = %30
+  br label %32
+
+; <label>:32                                      ; preds = %31, %30
+  br i1 undef, label %34, label %33
+
+; <label>:33                                      ; preds = %32
+  unreachable
+
+; <label>:34                                      ; preds = %32
+  br i1 undef, label %35, label %36, !prof !0
+
+; <label>:35                                      ; preds = %34
+  br label %36
+
+; <label>:36                                      ; preds = %35, %34
+  br i1 undef, label %37, label %38, !prof !0
+
+; <label>:37                                      ; preds = %36
+  br label %38
+
+; <label>:38                                      ; preds = %37, %36
+  br i1 undef, label %39, label %67
+
+; <label>:39                                      ; preds = %38
+  br i1 undef, label %40, label %41
+
+; <label>:40                                      ; preds = %39
+  br i1 undef, label %64, label %41
+
+; <label>:41                                      ; preds = %40, %39
+  br i1 undef, label %64, label %42
+
+; <label>:42                                      ; preds = %41
+  %43 = fadd <4 x float> undef, undef
+  %44 = fadd <4 x float> undef, undef
+  %45 = fmul <4 x float> undef, undef
+  %46 = fmul <4 x float> %45, %43
+  %47 = fmul <4 x float> undef, %44
+  %48 = load <4 x float>* undef, align 8, !tbaa !1
+  %49 = bitcast <4 x float> %48 to <2 x i64>
+  %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
+  %51 = bitcast <1 x i64> %50 to <2 x float>
+  %52 = shufflevector <2 x float> %51, <2 x float> undef, <4 x i32> zeroinitializer
+  %53 = bitcast <4 x float> %52 to <2 x i64>
+  %54 = shufflevector <2 x i64> %53, <2 x i64> undef, <1 x i32> zeroinitializer
+  %55 = bitcast <1 x i64> %54 to <2 x float>
+  %56 = extractelement <2 x float> %55, i32 0
+  %57 = insertelement <4 x float> undef, float %56, i32 2
+  %58 = insertelement <4 x float> %57, float 1.000000e+00, i32 3
+  %59 = fsub <4 x float> %47, %58
+  %60 = fmul <4 x float> undef, undef
+  %61 = fmul <4 x float> %59, %60
+  %62 = fmul <4 x float> %61, <float 6.000000e+01, float 6.000000e+01, float 6.000000e+01, float 6.000000e+01>
+  %63 = fadd <4 x float> %47, %62
+  store <4 x float> %46, <4 x float>* undef, align 8, !tbaa !1
+  call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
+  call arm_aapcs_vfpcc  void @bar(%0* undef, float 0.000000e+00) nounwind
+  store <4 x float> %63, <4 x float>* undef, align 8, !tbaa !1
+  unreachable
+
+; <label>:64                                      ; preds = %41, %40
+  br i1 undef, label %65, label %66
+
+; <label>:65                                      ; preds = %64
+  unreachable
+
+; <label>:66                                      ; preds = %64
+  unreachable
+
+; <label>:67                                      ; preds = %38
+  unreachable
+
+; <label>:68                                      ; preds = %21, %18
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(%0*, float)
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
new file mode 100644
index 0000000..bdcd1b6
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR13111
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-none-linux-gnueabi"
+
+define void @test_hi_char8() noinline {
+entry:
+  %0 = load <4 x i8>* undef, align 1
+  store <4 x i8> %0, <4 x i8>* null, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/2012-08-13-bfi.ll b/test/CodeGen/ARM/2012-08-13-bfi.ll
new file mode 100644
index 0000000..8263833
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-13-bfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: foo
+; CHECK-NOT: bfi
+; CHECK: bx
+define i32 @foo(i8 zeroext %i) nounwind uwtable readnone ssp {
+  %1 = and i8 %i, 15
+  %2 = zext i8 %1 to i32
+  %3 = icmp ult i8 %1, 10
+  %4 = or i32 %2, 48
+  %5 = add nsw i32 %2, 55
+  %6 = select i1 %3, i32 %4, i32 %5
+  ret i32 %6
+}
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 396de37..5e12d8e 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -57,3 +57,12 @@ store i64 %0, i64* @f3_var, align 4
 store i64 %1, i64* @f3_var, align 4
 ret void
 }
+
+define i64 @f4(i64* %val) nounwind {
+entry:
+  ;CHECK: f4
+  ;CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], {{r[0-9]?[13579]}}, [r0]
+  ;CHECK: mov r0, [[REG1]]
+  %0 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [$1]", "=&r,r,*Qo"(i64* %val, i64* %val) nounwind
+  ret i64 %0
+}
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index f4c1b5a..0ac8b48 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -disable-cgp-branch-opts -verify-machineinstrs | FileCheck %s
 
 define i32 @f1() {
 ; CHECK: f1
diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll
index 8a865e2..a3d49f6 100644
--- a/test/CodeGen/ARM/crash-greedy.ll
+++ b/test/CodeGen/ARM/crash-greedy.ll
@@ -82,3 +82,49 @@ if.then195:                                       ; preds = %if.then84
 if.end251:                                        ; preds = %if.then195, %if.then84, %entry
   ret void
 }
+
+; Coalescer failure: removeCopyByCommutingDef leaves a bad kill flag
+; behind.
+define void @rdar11950722() nounwind readonly optsize ssp align 2 {
+entry:
+  br i1 undef, label %land.lhs.true7, label %lor.lhs.false.i
+
+lor.lhs.false.i:
+  br i1 undef, label %if.then10.i, label %land.lhs.true7
+
+if.then10.i:
+  %xFlags.1.i = select i1 undef, i32 0, i32 undef
+  br i1 undef, label %land.lhs.true33.i, label %f.exit
+
+land.lhs.true33.i:
+  %and26.i = and i32 %xFlags.1.i, 8
+  %cmp27.i = icmp eq i32 %and26.i, 0
+  %and29.i = and i32 %xFlags.1.i, 2147483645
+  %xFlags.1.and29.i = select i1 %cmp27.i, i32 %xFlags.1.i, i32 %and29.i
+  %and34.i = and i32 %xFlags.1.i, 8
+  %cmp35.i = icmp eq i32 %and34.i, 0
+  %and37.i = and i32 %xFlags.1.i, 2147483645
+  %yFlags.1.and37.i = select i1 %cmp35.i, i32 %xFlags.1.i, i32 %and37.i
+  br label %f.exit
+
+f.exit:
+  %xFlags.3.i = phi i32 [ %xFlags.1.and29.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+  %yFlags.2.i = phi i32 [ %yFlags.1.and37.i, %land.lhs.true33.i ], [ %xFlags.1.i, %if.then10.i ]
+  %cmp40.i = icmp eq i32 %xFlags.3.i, %yFlags.2.i
+  br i1 %cmp40.i, label %land.lhs.true7, label %land.end
+
+land.lhs.true7:
+  br i1 undef, label %land.lhs.true34, label %lor.lhs.false27
+
+lor.lhs.false27:
+  br i1 undef, label %land.lhs.true34, label %land.end
+
+land.lhs.true34:
+  br i1 undef, label %land.end, label %lor.lhs.false44
+
+lor.lhs.false44:
+  ret void
+
+land.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/crash-shufflevector.ll b/test/CodeGen/ARM/crash-shufflevector.ll
new file mode 100644
index 0000000..ece4234
--- /dev/null
+++ b/test/CodeGen/ARM/crash-shufflevector.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=armv7--
+
+declare void @g(<16 x i8>)
+define void @f(<4 x i8> %param1, <4 x i8> %param2) {
+   %y1 = shufflevector <4 x i8> %param1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+   %y2 = shufflevector <4 x i8> %param2, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+   %z = shufflevector <16 x i8> %y1, <16 x i8> %y2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+   call void @g(<16 x i8> %z)
+   ret void
+}
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 9bdae43..4f4ff8e 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,16 +3,17 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: 	vadd.f32	q4, q8, q8
-;CHECK-NEXT: Ltmp
-;CHECK-NEXT: 	@DEBUG_VALUE: y <- Q4+0
-;CHECK-NEXT:    @DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT: Ltmp1
+
+;CHECK:@DEBUG_VALUE: x <- Q4+0
+;CHECK-NEXT:@DEBUG_VALUE: y <- Q4+0
 
 
 @.str = external constant [13 x i8]
 
 declare <4 x float> @test0001(float) nounwind readnone ssp
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
 entry:
   br label %for.body9
 
@@ -21,7 +22,7 @@ for.body9:                                        ; preds = %for.body9, %entry
   tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
   %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
   tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
-  br i1 undef, label %for.end54, label %for.body9, !dbg !44
+  br i1 %cond, label %for.end54, label %for.body9, !dbg !44
 
 for.end54:                                        ; preds = %for.body9
   %tmp115 = extractelement <4 x float> %add19, i32 1
@@ -52,7 +53,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
 !9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
 !13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 45c322d..bcb4ee7 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
-; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
 
 define float @test(float %a, float %b) {
 entry:
         %dum = fadd float %a, %b
-	%0 = tail call float @fabsf(float %dum)
+	%0 = tail call float @fabsf(float %dum) readnone
         %dum1 = fadd float %0, %b
 	ret float %dum1
 }
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index edc805a..b6c9098 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -2,6 +2,8 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -221,3 +223,67 @@ entry:
 }
 
 declare i32 @CallVariadic(i32, ...)
+
+; Test fastcc
+
+define fastcc void @fast_callee(float %i) ssp {
+entry:
+; ARM: fast_callee
+; ARM: vmov r0, s0
+; THUMB: fast_callee
+; THUMB: vmov r0, s0
+; ARM-NOVFP: fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @fast_caller() ssp {
+entry:
+; ARM: fast_caller
+; ARM: vldr s0,
+; THUMB: fast_caller
+; THUMB: vldr s0,
+; ARM-NOVFP: fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call fastcc void @fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+define void @no_fast_callee(float %i) ssp {
+entry:
+; ARM: no_fast_callee
+; ARM: vmov s0, r0
+; THUMB: no_fast_callee
+; THUMB: vmov s0, r0
+; ARM-NOVFP: no_fast_callee
+; ARM-NOVFP-NOT: s0
+; THUMB-NOVFP: no_fast_callee
+; THUMB-NOVFP-NOT: s0
+  call void @print(float %i)
+  ret void
+}
+
+define void @no_fast_caller() ssp {
+entry:
+; ARM: no_fast_caller
+; ARM: vmov r0, s0
+; THUMB: no_fast_caller
+; THUMB: vmov r0, s0
+; ARM-NOVFP: no_fast_caller
+; ARM-NOVFP: movw r0, #13107
+; ARM-NOVFP: movt r0, #16611
+; THUMB-NOVFP: no_fast_caller
+; THUMB-NOVFP: movw r0, #13107
+; THUMB-NOVFP: movt r0, #16611
+  call void @no_fast_callee(float 0x401C666660000000)
+  ret void
+}
+
+declare void @print(float)
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
new file mode 100644
index 0000000..111818b
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+
+define i32 @shl() nounwind ssp {
+entry:
+; ARM: shl
+; ARM: lsl r0, r0, #2
+  %shl = shl i32 -1, 2
+  ret i32 %shl
+}
+
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: shl_reg
+; ARM: lsl r0, r0, r1
+  %shl = shl i32 %src1, %src2
+  ret i32 %shl
+}
+
+define i32 @lshr() nounwind ssp {
+entry:
+; ARM: lshr
+; ARM: lsr r0, r0, #2
+  %lshr = lshr i32 -1, 2
+  ret i32 %lshr
+}
+
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: lshr_reg
+; ARM: lsr r0, r0, r1
+  %lshr = lshr i32 %src1, %src2
+  ret i32 %lshr
+}
+
+define i32 @ashr() nounwind ssp {
+entry:
+; ARM: ashr
+; ARM: asr r0, r0, #2
+  %ashr = ashr i32 -1, 2
+  ret i32 %ashr
+}
+
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+entry:
+; ARM: ashr_reg
+; ARM: asr r0, r0, r1
+  %ashr = ashr i32 %src1, %src2
+  ret i32 %ashr
+}
+
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index 27fa2b0..5511d24 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -11,7 +11,7 @@ entry:
 ; HARD: test1:
 ; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
 ; HARD: vbsl [[REG1]], d
-  %0 = tail call float @copysignf(float %x, float %y) nounwind
+  %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
   ret float %0
 }
 
@@ -25,7 +25,7 @@ entry:
 ; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
 ; HARD: vshl.i64 [[REG2]], [[REG2]], #32
 ; HARD: vbsl [[REG2]], d1, d0
-  %0 = tail call double @copysign(double %x, double %y) nounwind
+  %0 = tail call double @copysign(double %x, double %y) nounwind readnone
   ret double %0
 }
 
@@ -36,7 +36,7 @@ entry:
 ; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
 ; SOFT: vbsl [[REG3]],
   %0 = fmul double %x, %y
-  %1 = tail call double @copysign(double %0, double %z) nounwind
+  %1 = tail call double @copysign(double %0, double %z) nounwind readnone
   ret double %1
 }
 
diff --git a/test/CodeGen/ARM/floorf.ll b/test/CodeGen/ARM/floorf.ll
new file mode 100644
index 0000000..492fc36
--- /dev/null
+++ b/test/CodeGen/ARM/floorf.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=arm-unknown-unknown < %s | FileCheck %s
+
+; CHECK: test1
+define float @test1() nounwind uwtable readnone ssp {
+; CHECK-NOT: floorf
+  %foo = call float @floorf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+; CHECK: test2
+define float @test2() nounwind uwtable readnone ssp {
+; CHECK-NOT: ceilf
+  %foo = call float @ceilf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+; CHECK: test3
+define float @test3() nounwind uwtable readnone ssp {
+; CHECK-NOT: truncf
+  %foo = call float @truncf(float 0x4000CCCCC0000000) nounwind readnone
+  ret float %foo
+}
+
+declare float @floorf(float) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare float @truncf(float) nounwind readnone
+
+
+
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index c5583b9..1261ea5 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -15,14 +15,14 @@ entry:
   %1 = load i16* @y, align 2
   %2 = tail call float @llvm.convert.from.fp16(i16 %0)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
   %3 = tail call float @llvm.convert.from.fp16(i16 %1)
 ; CHECK: __gnu_h2f_ieee
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16: vcvtb.f32.f16
   %4 = fadd float %2, %3
   %5 = tail call i16 @llvm.convert.to.fp16(float %4)
 ; CHECK: __gnu_f2h_ieee
-; CHECK-FP16: vcvtb.f32.f16
+; CHECK-FP16: vcvtb.f16.f32
   store i16 %5, i16* @x, align 2
   ret void
 }
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index ce6d6b2..40ea33b 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 | FileCheck %s
 
 define float @f1(float %a, float %b) {
 ;CHECK: f1:
@@ -84,7 +84,7 @@ define float @f11(float %a) {
 ;CHECK: f11:
 ;CHECK: bic
 entry:
-	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
+	%tmp1 = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	ret float %tmp1
 }
 
@@ -94,7 +94,7 @@ define double @f12(double %a) {
 ;CHECK: f12:
 ;CHECK: vabs.f64
 entry:
-	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
+	%tmp1 = call double @fabs( double %a ) readnone	; <double> [#uses=1]
 	ret double %tmp1
 }
 
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 418d4f3..5575566 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,11 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw    [[R3:r[0-9]+]], #1123
 ; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
-; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
-; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
 ; CHECK-NEON-NEXT: cmp     r0, [[R3]]
-; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
+; CHECK-NEON-NEXT: it      eq
+; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index ca2e18a..cfc0e70 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -4,13 +4,13 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; ARM: t1:
-; ARM: sub r0, r1, #-2147483647
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #-2147483647
+; ARM: mov r0, r1
 
 ; T2: t1:
 ; T2: mvn r0, #-2147483648
-; T2: add r0, r1
-; T2: movgt r0, r1
+; T2: addle.w r1, r1
+; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
   %tmp3 = add i32 %tmp2, %b
@@ -19,12 +19,12 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; ARM: t2:
-; ARM: sub r0, r1, #10
-; ARM: movgt r0, r1
+; ARM: suble r1, r1, #10
+; ARM: mov r0, r1
 
 ; T2: t2:
-; T2: sub.w r0, r1, #10
-; T2: movgt r0, r1
+; T2: suble.w r1, r1, #10
+; T2: mov r0, r1
   %tmp1 = icmp sgt i32 %c, 10
   %tmp2 = select i1 %tmp1, i32 0, i32 10
   %tmp3 = sub i32 %b, %tmp2
@@ -33,12 +33,12 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 
 define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 ; ARM: t3:
-; ARM: mvnlt r2, #0
-; ARM: and r0, r2, r3
+; ARM: andge r3, r3, r2
+; ARM: mov r0, r3
 
 ; T2: t3:
-; T2: movlt.w r2, #-1
-; T2: and.w r0, r2, r3
+; T2: andge.w r3, r3, r2
+; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
   %z = select i1 %cond, i32 -1, i32 %x
   %s = and i32 %z, %y
@@ -47,12 +47,12 @@ define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 
 define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
 ; ARM: t4:
-; ARM: movlt r2, #0
-; ARM: orr r0, r2, r3
+; ARM: orrge r3, r3, r2
+; ARM: mov r0, r3
 
 ; T2: t4:
-; T2: movlt r2, #0
-; T2: orr.w r0, r2, r3
+; T2: orrge.w r3, r3, r2
+; T2: mov r0, r3
   %cond = icmp slt i32 %a, %b
   %z = select i1 %cond, i32 0, i32 %x
   %s = or i32 %z, %y
@@ -104,3 +104,121 @@ entry:
   ret i32 %tmp3
 }
 
+; Fold ORRri into movcc.
+define i32 @t8(i32 %a, i32 %b) nounwind {
+; ARM: t8:
+; ARM: cmp r0, r1
+; ARM: orrge r0, r1, #1
+
+; T2: t8:
+; T2: cmp r0, r1
+; T2: orrge r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ANDrr into movcc.
+define i32 @t9(i32 %a, i32 %b, i32 %c) nounwind {
+; ARM: t9:
+; ARM: cmp r0, r1
+; ARM: andge r0, r1, r2
+
+; T2: t9:
+; T2: cmp r0, r1
+; T2: andge.w r0, r1, r2
+  %x = and i32 %b, %c
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold EORrs into movcc.
+define i32 @t10(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t10:
+; ARM: cmp r0, r1
+; ARM: eorge r0, r1, r2, lsl #7
+
+; T2: t10:
+; T2: cmp r0, r1
+; T2: eorge.w r0, r1, r2, lsl #7
+  %s = shl i32 %c, 7
+  %x = xor i32 %b, %s
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Fold ORRri into movcc, reversing the condition.
+define i32 @t11(i32 %a, i32 %b) nounwind {
+; ARM: t11:
+; ARM: cmp r0, r1
+; ARM: orrlt r0, r1, #1
+
+; T2: t11:
+; T2: cmp r0, r1
+; T2: orrlt r0, r1, #1
+  %x = or i32 %b, 1
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %x, i32 %a
+  ret i32 %tmp1
+}
+
+; Fold ADDri12 into movcc
+define i32 @t12(i32 %a, i32 %b) nounwind {
+; ARM: t12:
+; ARM: cmp r0, r1
+; ARM: addge r0, r1,
+
+; T2: t12:
+; T2: cmp r0, r1
+; T2: addwge r0, r1, #3000
+  %x = add i32 %b, 3000
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %a, i32 %x
+  ret i32 %tmp1
+}
+
+; Handle frame index operands.
+define void @pr13628() nounwind uwtable align 2 {
+  %x3 = alloca i8, i32 256, align 8
+  %x4 = load i8* undef, align 1
+  %x5 = icmp ne i8 %x4, 0
+  %x6 = select i1 %x5, i8* %x3, i8* null
+  call void @bar(i8* %x6) nounwind
+  ret void
+}
+declare void @bar(i8*)
+
+; Fold zext i1 into predicated add
+define i32 @t13(i32 %c, i32 %a) nounwind readnone ssp {
+entry:
+; ARM: t13
+; ARM: cmp r1, #10
+; ARM: addgt r0, r0, #1
+
+; T2: t13
+; T2: cmp r1, #10
+; T2: addgt.w r0, r0, #1
+  %cmp = icmp sgt i32 %a, 10
+  %conv = zext i1 %cmp to i32
+  %add = add i32 %conv, %c
+  ret i32 %add
+}
+
+; Fold sext i1 into predicated sub
+define i32 @t14(i32 %c, i32 %a) nounwind readnone ssp {
+entry:
+; ARM: t14
+; ARM: cmp r1, #10
+; ARM: subgt r0, r0, #1
+
+; T2: t14
+; T2: cmp r1, #10
+; T2: subgt.w r0, r0, #1
+  %cmp = icmp sgt i32 %a, 10
+  %conv = sext i1 %cmp to i32
+  %add = add i32 %conv, %c
+  ret i32 %add
+}
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index a8237c6..869b926 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,25 +1,25 @@
-; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
-; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
-; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED
 
 ; rdar://7113725
+; rdar://12091029
 
 define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
 entry:
-; GENERIC: t:
-; GENERIC: ldrb [[R2:r[0-9]+]]
-; GENERIC: ldrb [[R3:r[0-9]+]]
-; GENERIC: ldrb [[R12:r[0-9]+]]
-; GENERIC: ldrb [[R1:r[0-9]+]]
-; GENERIC: strb [[R1]]
-; GENERIC: strb [[R12]]
-; GENERIC: strb [[R3]]
-; GENERIC: strb [[R2]]
-
-; DARWIN_V6: t:
-; DARWIN_V6: ldr r1
-; DARWIN_V6: str r1
+; EXPANDED: t:
+; EXPANDED: ldrb [[R2:r[0-9]+]]
+; EXPANDED: ldrb [[R3:r[0-9]+]]
+; EXPANDED: ldrb [[R12:r[0-9]+]]
+; EXPANDED: ldrb [[R1:r[0-9]+]]
+; EXPANDED: strb [[R1]]
+; EXPANDED: strb [[R12]]
+; EXPANDED: strb [[R3]]
+; EXPANDED: strb [[R2]]
+
+; UNALIGNED: t:
+; UNALIGNED: ldr r1
+; UNALIGNED: str r1
 
   %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
   %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
@@ -27,3 +27,35 @@ entry:
   store i32 %tmp.i, i32* %__dest2.i, align 1
   ret void
 }
+
+define void @hword(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: hword:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrh
+; EXPANDED-NOT: str1
+; EXPANDED: strh
+
+; UNALIGNED: hword:
+; UNALIGNED: vld1.16
+; UNALIGNED: vst1.16
+  %tmp = load double* %a, align 2
+  store double %tmp, double* %b, align 2
+  ret void
+}
+
+define void @byte(double* %a, double* %b) nounwind {
+entry:
+; EXPANDED: byte:
+; EXPANDED-NOT: vld1
+; EXPANDED: ldrb
+; EXPANDED-NOT: str1
+; EXPANDED: strb
+
+; UNALIGNED: byte:
+; UNALIGNED: vld1.8
+; UNALIGNED: vst1.8
+  %tmp = load double* %a, align 1
+  store double %tmp, double* %b, align 1
+  ret void
+}
diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll
index 49a6982..7a4b34f 100644
--- a/test/CodeGen/ARM/vfp.ll
+++ b/test/CodeGen/ARM/vfp.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
 
 define void @test(float* %P, double* %D) {
 	%A = load float* %P		; <float> [#uses=1]
@@ -17,11 +17,11 @@ define void @test_abs(float* %P, double* %D) {
 ;CHECK: test_abs:
 	%a = load float* %P		; <float> [#uses=1]
 ;CHECK: vabs.f32
-	%b = call float @fabsf( float %a )		; <float> [#uses=1]
+	%b = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	store float %b, float* %P
 	%A = load double* %D		; <double> [#uses=1]
 ;CHECK: vabs.f64
-	%B = call double @fabs( double %A )		; <double> [#uses=1]
+	%B = call double @fabs( double %A ) readnone	; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
 }
diff --git a/test/CodeGen/CellSPU/fcmp32.ll b/test/CodeGen/CellSPU/fcmp32.ll
index c14fd7b..f6b028d 100644
--- a/test/CodeGen/CellSPU/fcmp32.ll
+++ b/test/CodeGen/CellSPU/fcmp32.ll
@@ -1,4 +1,4 @@
-; RUN: llc --march=cellspu %s -o - | FileCheck %s
+; RUN: llc --mtriple=cellspu-unknown-elf %s -o - | FileCheck %s
 
 ; Exercise the floating point comparison operators for f32:
 
@@ -15,8 +15,8 @@ define i1 @fcmp_eq(float %arg1, float %arg2) {
 define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
 ; CHECK: fcmeq
 ; CHECK: bi $lr
-        %1 = call float @fabsf(float %arg1)
-        %2 = call float @fabsf(float %arg2)
+        %1 = call float @fabsf(float %arg1) readnone
+        %2 = call float @fabsf(float %arg2) readnone
         %3 = fcmp oeq float %1, %2
         ret i1 %3
 }
diff --git a/test/CodeGen/CellSPU/fneg-fabs.ll b/test/CodeGen/CellSPU/fneg-fabs.ll
index 1e5e3b3..6e01906 100644
--- a/test/CodeGen/CellSPU/fneg-fabs.ll
+++ b/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -32,11 +32,11 @@ declare double @fabs(double)
 declare float @fabsf(float)
 
 define double @fabs_dp(double %X) {
-        %Y = call double @fabs( double %X )
+        %Y = call double @fabs( double %X ) readnone
         ret double %Y
 }
 
 define float @fabs_sp(float %X) {
-        %Y = call float @fabsf( float %X )
+        %Y = call float @fabsf( float %X ) readnone
         ret float %Y
 }
diff --git a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
deleted file mode 100644
index ad418f7..0000000
--- a/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-; PR4317
-
-declare i32 @b()
-
-define void @a() {
-entry:
-  ret void
-
-dummy:
-  invoke i32 @b() to label %reg unwind label %reg
-
-reg:
-  %lpad = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-            catch i8* null
-  ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/donothing.ll b/test/CodeGen/Generic/donothing.ll
index d6ba138..3727b60 100644
--- a/test/CodeGen/Generic/donothing.ll
+++ b/test/CodeGen/Generic/donothing.ll
@@ -7,7 +7,7 @@ declare void @llvm.donothing() readnone
 ; CHECK: f1
 define void @f1() nounwind uwtable ssp {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
   invoke void @llvm.donothing()
   to label %invoke.cont unwind label %lpad
 
@@ -25,7 +25,7 @@ lpad:
 ; CHECK: f2
 define void @f2() nounwind {
 entry:
-; CHECK-NOT donothing
+; CHECK-NOT: donothing
   call void @llvm.donothing()
   ret void
 }
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
index 1cf0dd0..31b56fd 100644
--- a/test/CodeGen/Hexagon/opt-fabs.ll
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5  < %s | FileCheck %s
 ; Optimize fabsf to clrbit in V5.
 
 ; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
@@ -8,7 +8,7 @@ entry:
   %x.addr = alloca float, align 4
   store float %x, float* %x.addr, align 4
   %0 = load float* %x.addr, align 4
-  %call = call float @fabsf(float %0)
+  %call = call float @fabsf(float %0) readnone
   ret float %call
 }
 
diff --git a/test/CodeGen/Hexagon/simpletailcall.ll b/test/CodeGen/Hexagon/simpletailcall.ll
new file mode 100644
index 0000000..2876404
--- /dev/null
+++ b/test/CodeGen/Hexagon/simpletailcall.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: foo_empty
+; CHECK-NOT: allocframe
+; CHECK-NOT: memd(r29
+; CHECK: jump bar_empty
+
+define void @foo_empty(i32 %h) nounwind {
+entry:
+  %add = add nsw i32 %h, 3
+  %call = tail call i32 bitcast (i32 (...)* @bar_empty to i32 (i32)*)(i32 %add) nounwind
+  ret void
+}
+
+declare i32 @bar_empty(...)
diff --git a/test/CodeGen/MSP430/Inst8rr.ll b/test/CodeGen/MSP430/Inst8rr.ll
index 45342e2..b9c17d9 100644
--- a/test/CodeGen/MSP430/Inst8rr.ll
+++ b/test/CodeGen/MSP430/Inst8rr.ll
@@ -4,7 +4,7 @@ target triple = "msp430-generic-generic"
 
 define i8 @mov(i8 %a, i8 %b) nounwind {
 ; CHECK: mov:
-; CHECK: mov.b	r14, r15
+; CHECK: mov.{{[bw]}} r14, r15
 	ret i8 %b
 }
 
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 14288de..29f43c8 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -4,14 +4,10 @@ define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
 ; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
 ; CHECK: addu  $sp, $zero, $[[T2]]
-; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
-; CHECK: lw    $[[T4:[0-9]+]], %call16(foo)
-; CHECK: addu  $25, $zero, $[[T4]]
-; CHECK: addu  $4, $zero, $[[T1]]
-; CHECK: jalr  $25
+; CHECK: addu  $4, $zero, $[[T0]]
+; CHECK: addu  $4, $zero, $[[T2]]
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
   store i8 97, i8* %add.ptr, align 1
@@ -34,7 +30,6 @@ entry:
 ; CHECK: alloca2
 ; CHECK: subu  $[[T0:[0-9]+]], $sp
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addiu $[[T1:[0-9]+]], $sp
 
   %tmp1 = alloca i8, i32 %size, align 4
   %0 = bitcast i8* %tmp1 to i32*
@@ -42,7 +37,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 40
+; CHECK: addiu $4, $[[T0]], 40
 
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
   %1 = bitcast i8* %add.ptr to i32*
@@ -52,7 +47,7 @@ if.then:                                          ; preds = %entry
   br label %if.end
 
 if.else:                                          ; preds = %entry
-; CHECK: addiu $4, $[[T1]], 12
+; CHECK: addiu $4, $[[T0]], 12
 
   %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
   %2 = bitcast i8* %add.ptr5 to i32*
@@ -60,7 +55,7 @@ if.else:                                          ; preds = %entry
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-; CHECK: lw  $5, 0($[[T1]])
+; CHECK: lw  $5, 0($[[T0]])
 ; CHECK: lw  $25, %call16(printf)
 
   %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
new file mode 100644
index 0000000..4ff1204
--- /dev/null
+++ b/test/CodeGen/Mips/and1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %and = and i32 %0, %1
+; 16:	and	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %and)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
index b296ab3..49d8a72 100644
--- a/test/CodeGen/Mips/fabs.ll
+++ b/test/CodeGen/Mips/fabs.ll
@@ -1,8 +1,8 @@
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
-; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
-; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -mtriple=mips64el-linux-gnu -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -mtriple=mipsel-linux-gnu -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
 
 define float @foo0(float %a) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
index 08bd6e7..1c4a3fd 100644
--- a/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -28,7 +28,7 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   %0 = load float* %arrayidx1, align 1
   ret float %0
@@ -54,7 +54,7 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   %0 = load float* @gf, align 4
   %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   store float %0, float* %arrayidx1, align 1
@@ -64,7 +64,7 @@ entry:
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
 ; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   %0 = load double* %arrayidx1, align 1
   ret double %0
@@ -73,7 +73,7 @@ entry:
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
 ; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
   %0 = load double* @gd, align 8
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   store double %0, double* %arrayidx1, align 1
@@ -83,7 +83,7 @@ entry:
 define float @foo8() nounwind readonly {
 entry:
 ; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret float %0
 }
@@ -91,7 +91,7 @@ entry:
 define void @foo9(float %f) nounwind {
 entry:
 ; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
new file mode 100644
index 0000000..bee93ac
--- /dev/null
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -0,0 +1,34 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+;
+; re-enable this when mips16's jalr is fixed.
+; DISABLED: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+
+
+@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  ret i32 0
+
+; SR: 	.set	mips16                  # @main
+
+; SR:	save 	$ra, [[FS:[0-9]+]]
+; PE:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
+; PE: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
+; PE:	sll	$[[T3:[0-9]+]], $[[T1]], 16
+; C1:	lw	${{[0-9]+}}, %got($.str)(${{[0-9]+}})
+; C2:	lw	${{[0-9]+}}, %call16(printf)(${{[0-9]+}})
+; C1:	addiu	${{[0-9]+}}, %lo($.str)
+; C2:	move	$25, ${{[0-9]+}}
+; C1:	move 	$gp, ${{[0-9]+}}
+; C1:	jalr 	${{[0-9]+}}
+; SR:	restore 	$ra, [[FS]]
+; PE:	li	$2, 0
+; PE:	jr 	$ra
+
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/helloworld_pe.ll b/test/CodeGen/Mips/helloworld_pe.ll
deleted file mode 100644
index ba46662..0000000
--- a/test/CodeGen/Mips/helloworld_pe.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
-
-@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
-
-define i32 @main() nounwind {
-entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
-  ret i32 0
-
-; 16: 	.set	mips16                  # @main
-
-; 16:	li	$[[T1:[0-9]+]], %hi(_gp_disp)
-; 16: 	addiu	$[[T2:[0-9]+]], $pc, %lo(_gp_disp)
-; 16:	sll	$[[T3:[0-9]+]], $[[T1]], 16
-; 16:	li	$2, 0
-}
-
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index e33021f..8b1f71b 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
 ; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 88($sp)
-; CHECK: lw $[[R3:[0-9]+]], 92($sp)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
 ; CHECK: addu $4, $zero, $[[R2]]
 ; CHECK: addu $5, $zero, $[[R3]]
 ; CHECK: jalr $25
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index c81cc76..2e548790 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -6,8 +6,8 @@
 
 define void @f() nounwind {
 entry:
-; CHECK:  lui $at, 65534
-; CHECK:  addiu $at, $at, -24
+; CHECK:  lui $at, 65535
+; CHECK:  addiu $at, $at, -16
 ; CHECK:  addu  $sp, $sp, $at
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
new file mode 100644
index 0000000..aac2767
--- /dev/null
+++ b/test/CodeGen/Mips/lb1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 -1, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i8* @c, align 1
+; 16:	lb	${{[0-9]+}}, 0(${{[0-9]+}})
+  %conv = sext i8 %0 to i32
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
new file mode 100644
index 0000000..63e0cca
--- /dev/null
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@c = global i8 97, align 1
+@.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i8* @c, align 1
+  %conv = zext i8 %0 to i32
+; 16:	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i8* @c, align 1
+  %conv1 = zext i8 %1 to i32
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
new file mode 100644
index 0000000..1f95b09
--- /dev/null
+++ b/test/CodeGen/Mips/lh1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@s = global i16 -1, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i16* @s, align 2
+  %conv = sext i16 %0 to i32
+; 16:	lh	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
new file mode 100644
index 0000000..0cfcede
--- /dev/null
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+
+@s = global i16 255, align 2
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i16* @s, align 2
+  %conv = zext i16 %0 to i32
+; 16:	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
+  store i32 %conv, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
index 09745fb..bbdc05c 100644
--- a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -30,7 +30,7 @@ entry:
 
 define float @foo2(i32 %b, i32 %c) nounwind readonly {
 entry:
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
   %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -60,7 +60,7 @@ entry:
 
 define void @foo5(i32 %b, i32 %c) nounwind {
 entry:
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   %0 = load float* @gf, align 4
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
@@ -72,7 +72,7 @@ entry:
 define double @foo6(i32 %b, i32 %c) nounwind readonly {
 entry:
 ; CHECK: foo6
-; CHECK-NOT: ldxc1
+; CHECK-NOT: luxc1
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
   %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
@@ -83,7 +83,7 @@ entry:
 define void @foo7(i32 %b, i32 %c) nounwind {
 entry:
 ; CHECK: foo7
-; CHECK-NOT: sdxc1
+; CHECK-NOT: suxc1
   %0 = load double* @gd, align 8
   %idxprom = zext i32 %c to i64
   %idxprom1 = zext i32 %b to i64
@@ -95,7 +95,7 @@ entry:
 define float @foo8() nounwind readonly {
 entry:
 ; CHECK: foo8
-; CHECK: luxc1
+; CHECK-NOT: luxc1
   %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
   ret float %0
 }
@@ -103,7 +103,7 @@ entry:
 define void @foo9(float %f) nounwind {
 entry:
 ; CHECK: foo9
-; CHECK: suxc1
+; CHECK-NOT: suxc1
   store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
   ret void
 }
diff --git a/test/CodeGen/Mips/helloworld_call.ll b/test/CodeGen/Mips/neg1.ll
index 61c0446..281e626 100644
--- a/test/CodeGen/Mips/helloworld_call.ll
+++ b/test/CodeGen/Mips/neg1.ll
@@ -1,17 +1,15 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
-@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
 
 define i32 @main() nounwind {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  %0 = load i32* @i, align 4
+  %sub = sub nsw i32 0, %0
+; 16:	neg	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %sub)
   ret i32 0
-
-; 16: 	.set	mips16                  # @main
-
-; 16:	move 	$gp, ${{[0-9]+}}
-; 16:	jalr 	${{[0-9]+}}
-; 16:	li	$2, 0
 }
 
 declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
new file mode 100644
index 0000000..2163b23
--- /dev/null
+++ b/test/CodeGen/Mips/not1.ll
@@ -0,0 +1,16 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %neg = xor i32 %0, -1
+; 16:	not	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %neg)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index d5eac99..eac0d80 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -43,16 +43,16 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
 
 define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: lw  $4, 88($sp)
-; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
-; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
-; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: lw  $4, 80($sp)
+; CHECK: ldc1 $f[[F0:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 64($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 68($sp)
+; CHECK: lw  $[[R2:[0-9]+]], 60($sp)
+; CHECK: lh  $[[R1:[0-9]+]], 58($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 56($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
 ; CHECK: sw  $[[R1]], 28($sp)
 ; CHECK: sw  $[[R2]], 24($sp)
@@ -80,13 +80,13 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
 
 define void @f3(%struct.S2* nocapture byval %s2) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $4, 56($sp)
-; CHECK: lw  $4, 56($sp)
-; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: sw  $5, 52($sp)
+; CHECK: sw  $4, 48($sp)
+; CHECK: lw  $4, 48($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 60($sp)
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -99,13 +99,13 @@ entry:
 
 define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
 entry:
-; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $7, 68($sp)
-; CHECK: sw  $6, 64($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: lw  $4, 68($sp)
-; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
-; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
+; CHECK: addiu $sp, $sp, -48
+; CHECK: sw  $7, 60($sp)
+; CHECK: sw  $6, 56($sp)
+; CHECK: sw  $5, 52($sp)
+; CHECK: lw  $4, 60($sp)
+; CHECK: lw  $[[R1:[0-9]+]], 80($sp)
+; CHECK: lb  $[[R0:[0-9]+]], 52($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
 ; CHECK: sw  $[[R1]], 24($sp)
 
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll
index 49d614c..35332b6 100644
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -march=mipsel -pre-RA-sched=source < %s | FileCheck %s
 
-
 ; All test functions do the same thing - they return the first variable
 ; argument.
 
@@ -29,11 +28,11 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va1:
-; CHECK: addiu   $sp, $sp, -24
-; CHECK: sw      $7, 36($sp)
-; CHECK: sw      $6, 32($sp)
-; CHECK: sw      $5, 28($sp)
-; CHECK: lw      $2, 28($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: lw      $2, 20($sp)
 }
 
 ; check whether the variable double argument will be accessed from the 8-byte
@@ -55,11 +54,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va2:
-; CHECK: addiu   $sp, $sp, -24
-; CHECK: sw      $7, 36($sp)
-; CHECK: sw      $6, 32($sp)
-; CHECK: sw      $5, 28($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 28
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: sw      $5, 20($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 20
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -83,10 +82,10 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va3:
-; CHECK: addiu   $sp, $sp, -24
-; CHECK: sw      $7, 36($sp)
-; CHECK: sw      $6, 32($sp)
-; CHECK: lw      $2, 32($sp)
+; CHECK: addiu   $sp, $sp, -16
+; CHECK: sw      $7, 28($sp)
+; CHECK: sw      $6, 24($sp)
+; CHECK: lw      $2, 24($sp)
 }
 
 ; double
@@ -106,11 +105,11 @@ entry:
   ret double %tmp
 
 ; CHECK: va4:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $7, 44($sp)
-; CHECK: sw      $6, 40($sp)
-; CHECK: addiu   ${{[0-9]+}}, $sp, 40
-; CHECK: ldc1    $f0, 40($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: sw      $6, 32($sp)
+; CHECK: addiu   ${{[0-9]+}}, $sp, 32
+; CHECK: ldc1    $f0, 32($sp)
 }
 
 ; int
@@ -134,9 +133,9 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va5:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $7, 44($sp)
-; CHECK: lw      $2, 44($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: lw      $2, 36($sp)
 }
 
 ; double
@@ -160,9 +159,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va6:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: sw      $7, 44($sp)
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 44
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: sw      $7, 36($sp)
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 36
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@@ -188,8 +187,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va7:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: lw      $2, 48($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: lw      $2, 40($sp)
 }
 
 ; double
@@ -211,9 +210,9 @@ entry:
   ret double %tmp
 
 ; CHECK: va8:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: addiu   ${{[0-9]+}}, $sp, 56
-; CHECK: ldc1    $f0, 56($sp)
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   ${{[0-9]+}}, $sp, 48
+; CHECK: ldc1    $f0, 48($sp)
 }
 
 ; int
@@ -237,8 +236,8 @@ entry:
   ret i32 %tmp
 
 ; CHECK: va9:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: lw      $2, 60($sp)
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: lw      $2, 52($sp)
 }
 
 ; double
@@ -262,8 +261,8 @@ entry:
   ret double %tmp
 
 ; CHECK: va10:
-; CHECK: addiu   $sp, $sp, -40
-; CHECK: addiu   $[[R0:[0-9]+]], $sp, 60
+; CHECK: addiu   $sp, $sp, -32
+; CHECK: addiu   $[[R0:[0-9]+]], $sp, 52
 ; CHECK: addiu   $[[R1:[0-9]+]], $[[R0]], 7
 ; CHECK: addiu   $[[R2:[0-9]+]], $zero, -8
 ; CHECK: and     $[[R3:[0-9]+]], $[[R1]], $[[R2]]
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
new file mode 100644
index 0000000..b1c3696
--- /dev/null
+++ b/test/CodeGen/Mips/or1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %or = or i32 %0, %1
+; 16:	or	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %or)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/return-vector-float4.ll b/test/CodeGen/Mips/return-vector-float4.ll
new file mode 100644
index 0000000..ae10f12
--- /dev/null
+++ b/test/CodeGen/Mips/return-vector-float4.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mipsel -mattr=+android < %s | FileCheck %s
+
+define <4 x float> @retvec4() nounwind readnone {
+entry:
+; CHECK: lwc1 $f0
+; CHECK: lwc1 $f2
+; CHECK: lwc1 $f1
+; CHECK: lwc1 $f3
+
+  ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>
+}
+
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
new file mode 100644
index 0000000..e1a28d4
--- /dev/null
+++ b/test/CodeGen/Mips/sb1.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@c = common global i8 0, align 1
+@.str = private unnamed_addr constant [8 x i8] c"%i %c \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %conv = trunc i32 %0 to i8
+  store i8 %conv, i8* @c, align 1
+  %1 = load i32* @i, align 4
+  %2 = load i8* @c, align 1
+  %conv1 = sext i8 %2 to i32
+; 16:	sb	${{[0-9]+}}, 0(${{[0-9]+}})
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
new file mode 100644
index 0000000..1746ae2
--- /dev/null
+++ b/test/CodeGen/Mips/sh1.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 97, align 4
+@s = common global i16 0, align 2
+@.str = private unnamed_addr constant [9 x i8] c"%i %hi \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %conv = trunc i32 %0 to i16
+  store i16 %conv, i16* @s, align 2
+  %1 = load i32* @i, align 4
+  %2 = load i16* @s, align 2
+  %conv1 = sext i16 %2 to i32
+; 16:	sh	${{[0-9]+}}, 0(${{[0-9]+}})
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
new file mode 100644
index 0000000..fdcd38c
--- /dev/null
+++ b/test/CodeGen/Mips/sll1.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  %0 = load i32* @i, align 4
+  %shl = shl i32 %0, 4
+; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  store i32 %shl, i32* @j, align 4
+  %1 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
new file mode 100644
index 0000000..c2af454
--- /dev/null
+++ b/test/CodeGen/Mips/sll2.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %shl = shl i32 %0, %1
+; 16:	sllv	${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %shl, i32* @i, align 4
+  %2 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
new file mode 100644
index 0000000..15bf8d6
--- /dev/null
+++ b/test/CodeGen/Mips/sra1.ll
@@ -0,0 +1,15 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %shr = ashr i32 %0, 3
+; 16:	sra	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
new file mode 100644
index 0000000..26bf19d
--- /dev/null
+++ b/test/CodeGen/Mips/sra2.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -354, align 4
+@j = global i32 3, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @j, align 4
+  %shr = ashr i32 %0, %1
+; 16:	srav	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
new file mode 100644
index 0000000..3474283
--- /dev/null
+++ b/test/CodeGen/Mips/srl1.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %shr = lshr i32 %0, 4
+; 16:	srl	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+  store i32 %shr, i32* @j, align 4
+  %1 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
new file mode 100644
index 0000000..26ec092
--- /dev/null
+++ b/test/CodeGen/Mips/srl2.ll
@@ -0,0 +1,20 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@k = global i32 4, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %1 = load i32* @k, align 4
+  %shr = lshr i32 %0, %1
+; 16:	srlv	${{[0-9]+}}, ${{[0-9]+}}
+  store i32 %shr, i32* @j, align 4
+  %2 = load i32* @j, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/helloworld_sr.ll b/test/CodeGen/Mips/sub1.ll
index 641ee27..195750b 100644
--- a/test/CodeGen/Mips/helloworld_sr.ll
+++ b/test/CodeGen/Mips/sub1.ll
@@ -1,16 +1,15 @@
 ; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
-@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00", align 1
+@i = global i32 10, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
 
 define i32 @main() nounwind {
 entry:
-  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  %0 = load i32* @i, align 4
+  %sub = sub nsw i32 %0, 5
+; 16:	addiu	${{[0-9]+}}, -{{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
   ret i32 0
-
-; 16: 	.set	mips16                  # @main
-
-; 16:	save 	$ra, [[FS:[0-9]+]]
-; 16:	restore 	$ra, [[FS]]
 }
 
 declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
new file mode 100644
index 0000000..4f6bfcc
--- /dev/null
+++ b/test/CodeGen/Mips/sub2.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 10, align 4
+@j = global i32 20, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @i, align 4
+  %sub = sub nsw i32 %0, %1
+; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
index 2441ca2..9f91a39 100644
--- a/test/CodeGen/Mips/swzero.ll
+++ b/test/CodeGen/Mips/swzero.ll
@@ -4,6 +4,7 @@
 
 define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
 entry:
+; CHECK: swl $zero
 ; CHECK: swr $zero
   %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
   store i32 0, i32* %x, align 1
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
new file mode 100644
index 0000000..f2c1316
--- /dev/null
+++ b/test/CodeGen/Mips/xor1.ll
@@ -0,0 +1,17 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %1 = load i32* @y, align 4
+  %xor = xor i32 %0, %1
+; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %xor)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index 156e00b..ddcce74 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -2,6 +2,6 @@
 
 define double @fabs(double %f) {
 entry:
-	%tmp2 = tail call double @fabs( double %f )		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %f ) readnone	; <double> [#uses=1]
 	ret double %tmp2
 }
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index bbd5c71..9fa2dcb 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -3,7 +3,7 @@
 declare double @fabs(double)
 
 define double @test(double %X) {
-        %Y = call double @fabs( double %X )             ; <double> [#uses=1]
+        %Y = call double @fabs( double %X ) readnone     ; <double> [#uses=1]
         %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]
         ret double %Z
 }
diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll
index 5725c0d..12f1d1f 100644
--- a/test/CodeGen/PowerPC/lbzux.ll
+++ b/test/CodeGen/PowerPC/lbzux.ll
@@ -2,7 +2,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 ; RUN: llc < %s | FileCheck %s
 
-define fastcc void @allocateSpace() nounwind {
+define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
   %0 = load i8** undef, align 8, !tbaa !0
   br i1 undef, label %return, label %lor.lhs.false
@@ -20,10 +20,10 @@ while.cond:                                       ; preds = %while.body, %if.the
   %idxprom17 = sext i32 0 to i64
   %arrayidx18 = getelementptr inbounds i8* %0, i64 %idxprom17
   %or = or i32 undef, undef
-  br i1 false, label %if.end71, label %while.body
+  br i1 %cond1, label %if.end71, label %while.body
 
 while.body:                                       ; preds = %while.cond
-  br i1 undef, label %while.cond, label %if.then45
+  br i1 %cond2, label %while.cond, label %if.then45
 
 if.then45:                                        ; preds = %while.body
   %idxprom48139 = zext i32 %or to i64
diff --git a/test/CodeGen/PowerPC/ppc64-cyclecounter.ll b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
new file mode 100644
index 0000000..38406ca
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-cyclecounter.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i64 @test1() nounwind {
+entry:
+  %r = call i64 @llvm.readcyclecounter()
+  ret i64 %r
+}
+
+; CHECK: @test1
+; CHECK: mfspr 3, 268
+
+declare i64 @llvm.readcyclecounter()
+
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 8285742..01df373 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -95,7 +95,7 @@ bb.nph:
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
 ; CHECK: eor.w
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eorne.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index 74729fd..ead198f 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -4,9 +4,9 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: add r0, r1
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, r0
+; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
         %tmp3 = add i32 %tmp2, %b
@@ -15,10 +15,10 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t2
-; CHECK: add.w r0, r1, #-2147483648
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: addle.w r1, r1, #-2147483648
+; CHECK: mov r0, r1
 
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
@@ -28,10 +28,10 @@ define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
 
 define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t3
-; CHECK: sub.w r0, r1, #10
 ; CHECK: cmp r2, #10
-; CHECK: it  gt
-; CHECK: movgt r0, r1
+; CHECK: it  le
+; CHECK: suble.w r1, r1, #10
+; CHECK: mov r0, r1
         %tmp1 = icmp sgt i32 %c, 10
         %tmp2 = select i1 %tmp1, i32 0, i32 10
         %tmp3 = sub i32 %b, %tmp2
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 6ec9a48..a58c9b1 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -52,8 +52,8 @@ entry:
         %tmp21 = load double* %tmp20            ; <double> [#uses=1]
         %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
         %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
-        br label %return
-return:         ; preds = %entry
+        br label %finish
+finish:
         %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
         ret i32 %retval.upgrd.8
 }
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index adc825c..783d9f9 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep movb %t | count 2
+; RUN: grep movb %t | count 1
 ; RUN: grep "movzb[wl]" %t
 
 
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 266fd7b..39af931 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -10,10 +10,10 @@
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
-define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind  {
 entry:
 	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
-	br i1 true, label %bb951, label %bb917
+	br i1 %cond, label %bb951, label %bb917
 
 bb917:		; preds = %entry
 	ret i32 0
@@ -21,7 +21,7 @@ bb917:		; preds = %entry
 bb951:		; preds = %bb986, %entry
 	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
 	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
-	br i1 true, label %bb986, label %bb967
+	br i1 %cond, label %bb986, label %bb967
 
 bb967:		; preds = %bb951
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 66f0677..b2cf34c 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -17,8 +17,7 @@ bb:		; preds = %bb, %entry
 ; CHECK: %bb30.loopexit
 ; CHECK: divsd %xmm0
 ; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
+; CHECK: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 1b2f203..d50fe6f 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "8 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 4d5f8d7..a95dcb5 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,7 +3,7 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .section        .ctors.64535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_1
 ; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_2
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll
index 61fef90..1c1e8e2 100644
--- a/test/CodeGen/X86/2012-05-19-avx2-store.ll
+++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll
@@ -3,8 +3,7 @@
 define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
 entry:
   ; CHECK: vmovaps
-  ; CHECK: vmovaps
-  ; CHECK: vinsertf128
+  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
   ; CHECK: vmovups
   %A = load <4 x i32>* %Ap
   %B = load <4 x i32>* %Bp
diff --git a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
new file mode 100644
index 0000000..000b853
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; Cmp lowering should not look past the truncate unless the high bits are known
+; zero.
+; rdar://12027825
+
+define void @foo(i8 %arg4, i32 %arg5, i32* %arg14) nounwind {
+bb:
+; CHECK: foo:
+; CHECK-NOT: testl
+; CHECK: testb
+  %tmp48 = zext i8 %arg4 to i32
+  %tmp49 = and i32 %tmp48, 32
+  %tmp50 = add i32 %tmp49, 1593371643
+  %tmp55 = sub i32 %tmp50, 0
+  %tmp56 = add i32 %tmp55, 7787538
+  %tmp57 = xor i32 %tmp56, 1601159181
+  %tmp58 = xor i32 %arg5, 1601159181
+  %tmp59 = and i32 %tmp57, %tmp58
+  %tmp60 = add i32 %tmp59, -1263900958
+  %tmp67 = sub i32 %tmp60, 0
+  %tmp103 = xor i32 %tmp56, 13
+  %tmp104 = trunc i32 %tmp103 to i8
+  %tmp105 = sub i8 0, %tmp104
+  %tmp106 = add i8 %tmp105, -103
+  %tmp113 = sub i8 %tmp106, 0
+  %tmp114 = add i8 %tmp113, -72
+  %tmp141 = icmp ne i32 %tmp67, -1263900958
+  %tmp142 = select i1 %tmp141, i8 %tmp114, i8 undef
+  %tmp143 = xor i8 %tmp142, 81
+  %tmp144 = zext i8 %tmp143 to i32
+  %tmp145 = add i32 %tmp144, 2062143348
+  %tmp152 = sub i32 %tmp145, 0
+  store i32 %tmp152, i32* %arg14
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-08-16-setcc.ll b/test/CodeGen/X86/2012-08-16-setcc.ll
new file mode 100644
index 0000000..ed51156
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-16-setcc.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; rdar://12081007
+
+; CHECK: and_1:
+; CHECK: andb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: and_2:
+; CHECK: andb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}
+
+; CHECK: xor_1:
+; CHECK: xorb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: xor_2:
+; CHECK: xorb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
new file mode 100644
index 0000000..a65e688
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; Check that an overly large immediate created by SROA doesn't crash the
+; legalizer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._GtkSheetRow = type { i32*, i32, i32, i32, %struct._GtkSheetButton, i32, i32 }
+%struct._GtkSheetButton = type { i32, i32*, i32, i32*, i32 }
+
+@a = common global %struct._GtkSheetRow* null, align 8
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+  %0 = load %struct._GtkSheetRow** @a, align 8
+  %1 = bitcast %struct._GtkSheetRow* %0 to i576*
+  %srcval2 = load i576* %1, align 8
+  %tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i576 %srcval2, i576* %1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+
+; CHECK: fn1:
+; CHECK: shrq $32, [[REG:%.*]]
+; CHECK: testq [[REG]], [[REG]]
+; CHECK: je
+}
diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll
index cc709b5..1f9e85c 100644
--- a/test/CodeGen/X86/alignment-2.ll
+++ b/test/CodeGen/X86/alignment-2.ll
@@ -18,7 +18,9 @@
 define signext i8 @do_lo_list() nounwind optsize ssp {
 bb:
 ; CHECK:     do_lo_list
-; CHECK-NOT: movaps
+; Make sure we do not use movaps for the global variable.
+; It is okay to use movaps for writing the local variable on stack.
+; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}}
   %myopt = alloca %struct.printQueryOpt, align 4
   %tmp = bitcast %struct.printQueryOpt* %myopt to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
diff --git a/test/CodeGen/X86/atomic-pointer.ll b/test/CodeGen/X86/atomic-pointer.ll
new file mode 100644
index 0000000..a455277
--- /dev/null
+++ b/test/CodeGen/X86/atomic-pointer.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i686-none-linux | FileCheck %s
+
+define i32* @test_atomic_ptr_load(i32** %a0) {
+; CHECK: test_atomic_ptr_load
+; CHECK: movl
+; CHECK: movl
+; CHECK: ret
+0:
+  %0 = load atomic i32** %a0 seq_cst, align 4
+  ret i32* %0
+}
+
+define void @test_atomic_ptr_store(i32* %a0, i32** %a1) {
+; CHECK: test_atomic_ptr_store
+; CHECK: movl
+; CHECK: movl
+; CHECK: xchgl
+; CHECK: ret
+0:
+  store atomic i32* %a0, i32** %a1 seq_cst, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index a8fd8e3..c44beb4 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1154,7 +1154,7 @@ define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1165,7 +1165,7 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1176,7 +1176,7 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1187,7 +1187,7 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1198,7 +1198,7 @@ define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1209,6 +1209,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -1226,7 +1227,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
 
 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1235,7 +1236,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1244,7 +1245,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1253,7 +1254,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1262,7 +1263,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1271,6 +1272,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index fc7b638..5534712 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; that is not expected to run.
 ; CHECK: test_ifchains:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %else1
+; CHECK-NOT: .align
 ; CHECK: %else2
+; CHECK-NOT: .align
 ; CHECK: %else3
+; CHECK-NOT: .align
 ; CHECK: %else4
+; CHECK-NOT: .align
 ; CHECK: %exit
 ; CHECK: %then1
 ; CHECK: %then2
@@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK: test_loop_cold_blocks:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %unlikely1
+; CHECK-NOT: .align
 ; CHECK: %unlikely2
+; CHECK: .align
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
@@ -634,7 +642,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
 ;
 ; CHECK: test_unnatural_cfg_backwards_inner_loop
 ; CHECK: %entry
-; CHECK: %body
+; CHECK: [[BODY:# BB#[0-9]+]]:
 ; CHECK: %loop2b
 ; CHECK: %loop1
 ; CHECK: %loop2a
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
new file mode 100644
index 0000000..0cb9fd9
--- /dev/null
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
+
+define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 %a, i32 %b
+  ret i32 %t3
+; CHECK: foo
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: cmov
+; CHECK: ret
+}
+
+define i32 @bar(<2 x i64> %c) {
+entry:
+  %0 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %if-true-block, label %endif-block
+if-true-block:                                    ; preds = %entry
+  ret i32 0
+endif-block:                                      ; preds = %entry,
+  ret i32 1
+; CHECK: bar
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: jne
+; CHECK: ret
+}
+
+define i32 @bax(<2 x i64> %c) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp eq i32 %t1, 1
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+; CHECK: bax
+; CHECK: ptest
+; CHECK-NOT: cmpl
+; CHECK: ret
+}
+
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 2c37194..5223463 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
 
 ; CHECK: orq
-; CHECK-NEXT: LBB0_1
+; CHECK-NEXT: %bb8.i329
 
 @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
 @_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 3e65867..4d80189 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -34,8 +34,7 @@ entry:
 define double @squirt(double* %x) nounwind {
 entry:
 ; CHECK: squirt:
-; CHECK: movsd ([[A0]]), %xmm0
-; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 43beac0..ed25c82 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test1:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -19,8 +19,8 @@ entry:
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test2:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
index ce1fe04..7883ffa 100644
--- a/test/CodeGen/X86/early-ifcvt.ll
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -37,3 +37,33 @@ do.end:
   %sub = sub nsw i32 %max.1, %min.1
   ret i32 %sub
 }
+
+; CHECK: multipreds
+; Deal with alternative tail predecessors
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: fprintf
+
+define void @multipreds(i32 %sw) nounwind uwtable ssp {
+entry:
+  switch i32 %sw, label %if.then29 [
+    i32 0, label %if.then37
+    i32 127, label %if.end41
+  ]
+
+if.then29:
+  br label %if.end41
+
+if.then37:
+  br label %if.end41
+
+if.end41:
+  %exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
+  call void (...)* @fprintf(i32 %exit_status.0) nounwind
+  unreachable
+}
+
+declare void @fprintf(...) nounwind
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 5adc38f..af1867f 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,6 +1,7 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse2,-sse3,-sse | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -O0 | FileCheck %s --check-prefix=NOOPT
 
 declare float @fabsf(float)
 
@@ -8,8 +9,9 @@ declare x86_fp80 @fabsl(x86_fp80)
 
 ; CHECK:  test1:
 ; UNSAFE: test1:
+; NOOPT:  test1:
 define float @test1(float %X) {
-        %Y = call float @fabsf(float %X)
+        %Y = call float @fabsf(float %X) readnone
         ret float %Y
 }
 ; CHECK:  {{^[ \t]+fabs$}}
@@ -17,9 +19,11 @@ define float @test1(float %X) {
 
 ; CHECK-NOT:  fabs
 ; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabsf
 
 ; CHECK:  test2:
 ; UNSAFE: test2:
+; NOOPT:  test2:
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
         %Z = fsub double -0.0, %X
@@ -28,6 +32,7 @@ define double @test2(double %X) {
 }
 ; fabs is not used here.
 ; CHECK-NOT:  fabs
+; NOOPT-NOT:  fabs
 
 ; UNSAFE: {{^[ \t]+fabs$}}
 
@@ -35,12 +40,15 @@ define double @test2(double %X) {
 
 ; CHECK:  test3:
 ; UNSAFE: test3:
+; NOOPT:  test3:
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+        %Y = call x86_fp80 @fabsl(x86_fp80 %X) readnone
         ret x86_fp80 %Y
 }
 ; CHECK:  {{^[ \t]+fabs$}}
 ; UNSAFE: {{^[ \t]+fabs$}}
+; NOOPT:  {{^[ \t]+fabs$}}
 
 ; CHECK-NOT:  fabs
 ; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabs
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19f3888..4caa3a0 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -57,6 +57,6 @@ entry:
 ; CHECK: subl $28
 ; CHECK: leal (%esp), %ecx
 ; CHECK: calll _test4fastccsret
-; CHECK addl $28
+; CHECK: addl $28
 }
 declare fastcc void @test4fastccsret(%struct.a* sret)
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 5deedb9..b0c1d0a 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10               | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10             | FileCheck %s --check-prefix=CHECK-FMA-CALL
 
 ; CHECK: test_f32
-; CHECK: _fmaf
+; CHECK-FMA-INST: vfmadd213ss
+; CHECK-FMA-CALL: _fmaf
 
 define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
@@ -11,7 +14,8 @@ entry:
 }
 
 ; CHECK: test_f64
-; CHECK: _fma
+; CHECK-FMA-INST: vfmadd213sd
+; CHECK-FMA-CALL: _fma
 
 define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
index 8659dfe..90529e0 100755
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -1,42 +1,42 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
 
 define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ss %xmm
+  ; CHECK: fmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ps
+  ; CHECK: fmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ss %xmm
+  ; CHECK: fnmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ps
+  ; CHECK: fnmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fnmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
@@ -44,28 +44,28 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
 
 
 define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ss
+  ; CHECK: fmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ps
+  ; CHECK: fmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ss
+  ; CHECK: fnmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ps
+  ; CHECK: fnmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
@@ -74,28 +74,28 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
 ;;;;
 
 define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132sd
+  ; CHECK: fmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132pd
+  ; CHECK: fmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132sd
+  ; CHECK: fnmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132pd
+  ; CHECK: fnmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
@@ -104,28 +104,28 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
 
 
 define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132sd
+  ; CHECK: fmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132pd
+  ; CHECK: fmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132sd
+  ; CHECK: fnmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132pd
+  ; CHECK: fnmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
new file mode 100644
index 0000000..5d97a87
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vfmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fadd <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: fmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps
+; CHECK: fnmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %a2, %x
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps
+; CHECK: fnmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmadd_ps_y
+; CHECK: vfmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fadd <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y
+; CHECK: vfmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y
+; CHECK: vfnmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %a2, %x
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y
+; CHECK: vfnmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <8 x float> %y, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y
+; CHECK: vfmadd213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fadd <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y
+; CHECK: vfmsub213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd
+; CHECK: vfmsub213pd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+; CHECK: test_x86_fnmadd_ss
+; CHECK: vfnmadd213ss    %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+  %x = fmul float %a0, %a1
+  %res = fsub float %a2, %x
+  ret float %res
+}
+
+; CHECK: test_x86_fnmadd_sd
+; CHECK: vfnmadd213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %a2, %x
+  ret double %res
+}
+
+; CHECK: test_x86_fmsub_sd
+; CHECK: vfmsub213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+; CHECK: test_x86_fnmsub_ss
+; CHECK: vfnmsub213ss     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
+  %x = fsub float -0.000000e+00, %a0
+  %y = fmul float %x, %a1
+  %res = fsub float %y, %a2
+  ret float %res
+}
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index e03cb7e..d836665 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -45,3 +45,29 @@ L:
 
 }
 
+; rdar://10554090
+; xor in exit block will be CSE'ed and load will be folded to xor in entry.
+define i1 @test3(i32* %P, i32* %Q) nounwind {
+; CHECK: test3:
+; CHECK: movl 8(%esp), %eax
+; CHECK: xorl (%eax),
+; CHECK: j
+; CHECK-NOT: xor
+entry:
+  %0 = load i32* %P, align 4
+  %1 = load i32* %Q, align 4
+  %2 = xor i32 %0, %1
+  %3 = and i32 %2, 89947
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %exit, label %land.end
+
+exit:
+  %shr.i.i19 = xor i32 %1, %0
+  %5 = and i32 %shr.i.i19, 3456789123
+  %6 = icmp eq i32 %5, 0
+  br label %land.end
+
+land.end:
+  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
+  ret i1 %7
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index cc4198d..d850630 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pcmpeqd %t | count 1
-; RUN: grep xor %t | count 1
-; RUN: not grep LCP %t
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
 
 define <2 x double> @foo() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+; CHECK: foo:
+; CHECK: pcmpeqd %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
 define <2 x double> @bar() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+; CHECK: bar:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index 4ab9067..2ada194 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -24,8 +24,8 @@ define i64 @g(i32 %i) nounwind {
 ; CHECK-NEXT: andl   $-32, %esp
 ; CHECK-NEXT: subl   $32, %esp
 ;
-; Now setup the base pointer (%ebx).
-; CHECK-NEXT: movl   %esp, %ebx
+; Now setup the base pointer (%esi).
+; CHECK-NEXT: movl   %esp, %esi
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
 ; The next adjustment of the stack is due to the alloca.
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 6966cf0..1f5121d 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mcpu=yonah | FileCheck %s
 ; CHECK-NOT:     {{((xor|and)ps|movd)}}
 
 ; These operations should be done in integer registers, eliminating constant
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 1344cdc..0729dda 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -10,7 +10,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C,
 ; CHECK: foo
 ; CHECK: addl
 ; CHECK: addl
-; CEHCK: addl
+; CHECK: addl
 
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/gs-fold.ll b/test/CodeGen/X86/gs-fold.ll
new file mode 100644
index 0000000..dbec76b
--- /dev/null
+++ b/test/CodeGen/X86/gs-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd | FileCheck %s --check-prefix=CHECK-FBSD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK-LINUX
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.thread = type { i32, i32, i32, i32 }
+
+define i32 @test() nounwind uwtable {
+entry:
+  %0 = load volatile %struct.thread* addrspace(256)* null
+  %c = getelementptr inbounds %struct.thread* %0, i64 0, i32 2
+  %1 = load i32* %c, align 4
+  ret i32 %1
+}
+
+; Check that we are not assuming that gs contains the address of gs if we are not targeting Linux
+; CHECK-FBSD: movq	%gs:0, %rax
+; CHECK-FBSD: movl	8(%rax), %eax
+; Check that we are assuming that gs contains the address of gs if we are targeting Linux
+; CHECK-LINUX: movl	%gs:8, %eax
+
diff --git a/test/CodeGen/X86/inreg.ll b/test/CodeGen/X86/inreg.ll
new file mode 100644
index 0000000..6653cfb
--- /dev/null
+++ b/test/CodeGen/X86/inreg.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 | FileCheck --check-prefix=DAG %s
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 -O0 | FileCheck --check-prefix=FAST %s
+
+%struct.s1 = type { double, float }
+
+define void @g1() nounwind {
+entry:
+  %tmp = alloca %struct.s1, align 4
+  call void @f(%struct.s1* inreg sret %tmp, i32 inreg 41, i32 inreg 42, i32 43)
+  ret void
+  ; DAG: g1:
+  ; DAG: subl $[[AMT:.*]], %esp
+  ; DAG-NEXT: $43, (%esp)
+  ; DAG-NEXT: leal    16(%esp), %eax
+  ; DAG-NEXT: movl    $41, %edx
+  ; DAG-NEXT: movl    $42, %ecx
+  ; DAG-NEXT: calll   f
+  ; DAG-NEXT: addl $[[AMT]], %esp
+  ; DAG-NEXT: ret
+
+  ; FAST: g1:
+  ; FAST: subl $[[AMT:.*]], %esp
+  ; FAST-NEXT: leal    8(%esp), %eax
+  ; FAST-NEXT: movl    $41, %edx
+  ; FAST-NEXT: movl    $42, %ecx
+  ; FAST: $43, (%esp)
+  ; FAST: calll   f
+  ; FAST-NEXT: addl $[[AMT]], %esp
+  ; FAST: ret
+}
+
+declare void @f(%struct.s1* inreg sret, i32 inreg, i32 inreg, i32)
+
+%struct.s2 = type {}
+
+define void @g2(%struct.s2* inreg sret %agg.result) nounwind {
+entry:
+  ret void
+  ; DAG: g2
+  ; DAG-NOT: ret $4
+  ; DAG: .size g2
+
+  ; FAST: g2
+  ; FAST-NOT: ret $4
+  ; FAST: .size g2
+}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5b7c19f..48e2106 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -142,6 +142,7 @@ if.else:
 define i32 @l4(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK: l4:
+; CHECK: xor
 ; CHECK: sub
 ; CHECK-NOT: cmp
   %cmp = icmp sgt i32 %b, %a
@@ -213,3 +214,42 @@ entry:
   %add. = select i1 %cmp, i32 %add, i32 0
   ret i32 %add.
 }
+; PR13475
+; If we have sub a, b and cmp b, a and the result of cmp is used
+; by sbb, we should not optimize cmp away.
+define i32 @q(i32 %j.4, i32 %w, i32 %el) {
+; CHECK: q:
+; CHECK: sub
+; CHECK: cmp
+; CHECK-NEXT: sbb
+  %tmp532 = add i32 %j.4, %w
+  %tmp533 = icmp ugt i32 %tmp532, %el
+  %tmp534 = icmp ult i32 %w, %el
+  %or.cond = and i1 %tmp533, %tmp534
+  %tmp535 = sub i32 %el, %w
+  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
+  ret i32 %j.5
+}
+; rdar://11873276
+define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: r:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: j
+; CHECK-NOT: sub
+; CHECK: ret
+  %0 = load i32* %offset, align 8
+  %cmp = icmp slt i32 %0, %size
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  %sub = sub nsw i32 %0, %size
+  store i32 %sub, i32* %offset, align 8
+  %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+  br label %return
+
+return:
+  %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
+}
diff --git a/test/CodeGen/X86/liveness-local-regalloc.ll b/test/CodeGen/X86/liveness-local-regalloc.ll
index faeeaa7..721f545 100644
--- a/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs -mtriple=x86_64-apple-darwin10
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs -mtriple=x86_64-apple-darwin10
 ; <rdar://problem/7755473>
 ; PR12821
 
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index d14102f..4bd162b 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,7 +41,6 @@ done:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
@@ -79,7 +78,6 @@ exit:
 ; CHECK-NEXT: .LBB2_5:
 ; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
 ;
@@ -139,13 +137,13 @@ exit:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   callq   loop_header
 ;      CHECK:   jl .LBB3_7
 ;      CHECK:   jge .LBB3_3
 ; CHECK-NEXT:   callq   bar101
 ; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_3:
 ;      CHECK:   jge .LBB3_4
 ; CHECK-NEXT:   callq   bar102
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 6e8d0bf..8a81f70 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -3,11 +3,11 @@
 
 ; CHECK: t:
 ; CHECK: decq
-; CHECK-NEXT: movl (%r11,%rax,4), %eax
+; CHECK-NEXT: movl (%r9,%rax,4), %eax
 ; CHECK-NEXT: jne
 
 ; ATOM: t:
-; ATOM: movl (%r10,%rax,4), %eax
+; ATOM: movl (%r9,%rax,4), %eax
 ; ATOM-NEXT: decq
 ; ATOM-NEXT: jne
 
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a757cde..d171fd5 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -99,3 +99,60 @@ return:                                           ; preds = %if.end, %entry
   %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
   ret i32 %retval.0
 }
+
+; rdar://11393714
+define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
+; CHECK: %entry
+; CHECK: xorl
+; CHECK: %preheader
+; CHECK: %do.body
+; CHECK-NOT: xorl
+; CHECK: %do.cond
+; CHECK-NOT: xorl
+; CHECK: %return
+entry:
+  %cmp = icmp eq i64 %n, 0
+  br i1 %cmp, label %return, label %preheader
+
+preheader:
+  %conv2 = and i32 %c, 255
+  br label %do.body
+
+do.body:
+  %n.addr.0 = phi i64 [ %dec, %do.cond ], [ %n, %preheader ]
+  %p.0 = phi i8* [ %incdec.ptr, %do.cond ], [ %s, %preheader ]
+  %cmp3 = icmp eq i32 %a, %conv2
+  br i1 %cmp3, label %return, label %do.cond
+
+do.cond:
+  %incdec.ptr = getelementptr inbounds i8* %p.0, i64 1
+  %dec = add i64 %n.addr.0, -1
+  %cmp6 = icmp eq i64 %dec, 0
+  br i1 %cmp6, label %return, label %do.body
+
+return:
+  %retval.0 = phi i8* [ null, %entry ], [ null, %do.cond ], [ %p.0, %do.body ]
+  ret i8* %retval.0
+}
+
+; PR13578
+@t2_global = external global i32
+
+declare i1 @t2_func()
+
+define i32 @t2() {
+  store i32 42, i32* @t2_global
+  %c = call i1 @t2_func()
+  br i1 %c, label %a, label %b
+
+a:
+  %l = load i32* @t2_global
+  ret i32 %l
+
+b:
+  ret i32 0
+
+; CHECK: t2:
+; CHECK: t2_global@GOTPCREL(%rip)
+; CHECK-NOT: t2_global@GOTPCREL(%rip)
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f4bc1bb..723d1d8 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -disable-simplify-libcalls -mtriple=x86_64-linux | FileCheck %s --check-prefix=NOBUILTIN
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; This tests codegen time inlining/optimization of memcmp
@@ -23,6 +24,8 @@ return:                                           ; preds = %entry
 ; CHECK: memcmp2:
 ; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
 ; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
+; NOBUILTIN: memcmp2:
+; NOBUILTIN: callq
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 86c6862..39c7fba 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -65,18 +65,18 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
 ; LINUX: test4:
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
 }
 
 
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index aeb540f..65ee7b1 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -55,4 +55,20 @@ entry:
 ; X64:	ret
 }
 
+; The two loads here both look identical to selection DAG, except for their
+; address spaces.  Make sure they aren't CSE'd.
+define i32 @test_no_cse() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp1, %tmp3
+	ret i32 %tmp4
+}
+; X32: test_no_cse:
+; X32: 	movl	%gs:196
+; X32: 	movl	%fs:196
+; X32: 	ret
+
 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll
new file mode 100644
index 0000000..e7e29e0
--- /dev/null
+++ b/test/CodeGen/X86/pr11334.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
+entry:
+; CHECK: v2f2d_ext_vec
+; CHECK: cvtps2pd
+; AVX:   v2f2d_ext_vec
+; AVX:   vcvtps2pd
+  %f1 = fpext <2 x float> %v1 to <2 x double>
+  ret <2 x double> %f1
+}
+
+define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
+entry:
+; CHECK: v3f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v3f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <3 x float> %v1 to <3 x double>
+  ret <3 x double> %f1
+}
+
+define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
+entry:
+; CHECK: v4f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v4f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <4 x float> %v1 to <4 x double>
+  ret <4 x double> %f1
+}
+
+define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
+entry:
+; CHECK: v8f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v8f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   vextractf128
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <8 x float> %v1 to <8 x double>
+  ret <8 x double> %f1
+}
+
+define void @test_vector_creation() nounwind {
+  %1 = insertelement <4 x double> undef, double 0.000000e+00, i32 2
+  %2 = load double addrspace(1)* null
+  %3 = insertelement <4 x double> %1, double %2, i32 3
+  store <4 x double> %3, <4 x double>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
new file mode 100644
index 0000000..faaec26
--- /dev/null
+++ b/test/CodeGen/X86/pr13577.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64
+
+define x86_fp80 @foo(x86_fp80 %a) {
+  %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone
+  ret x86_fp80 %1
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
new file mode 100644
index 0000000..9772125
--- /dev/null
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+@.str2 = private unnamed_addr constant [7 x i8] c"memchr\00", align 1
+@.str3 = private unnamed_addr constant [11 x i8] c"bsd_memchr\00", align 1
+@str4 = private unnamed_addr constant [5 x i8] c"Bug!\00"
+
+; Make sure at end of do.cond.i, we jump to do.body.i first to have a tighter
+; inner loop.
+define i32 @test_branches_order() uwtable ssp {
+; CHECK: test_branches_order:
+; CHECK: [[L0:LBB0_[0-9]+]]: ## %do.body.i
+; CHECK: je
+; CHECK: %do.cond.i
+; CHECK: jne [[L0]]
+; CHECK: jmp
+; CHECK: %exit
+entry:
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  br label %for.cond
+
+for.cond:
+  %j.0 = phi i32 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  %cmp = icmp slt i32 %j.0, 1000
+  br i1 %cmp, label %for.cond1, label %for.end11
+
+for.cond1:
+  %indvars.iv50 = phi i64 [ %indvars.iv.next51, %for.body3 ], [ 0, %for.cond ]
+  %0 = trunc i64 %indvars.iv50 to i32
+  %cmp2 = icmp slt i32 %0, 1000
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.body3:
+  %arraydecay = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
+  %call = call i8* @memchr(i8* %arraydecay, i32 120, i64 1000)
+  %add.ptr = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
+  %cmp7 = icmp eq i8* %call, %add.ptr
+  %indvars.iv.next51 = add i64 %indvars.iv50, 1
+  br i1 %cmp7, label %for.cond1, label %if.then
+
+if.then:
+  %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc9:
+  %inc10 = add nsw i32 %j.0, 1
+  br label %for.cond
+
+for.end11:
+  %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0))
+  br label %for.cond14
+
+for.cond14:
+  %j13.0 = phi i32 [ 0, %for.end11 ], [ %inc39, %for.inc38 ]
+  %cmp15 = icmp slt i32 %j13.0, 1000
+  br i1 %cmp15, label %for.cond18, label %for.end40
+
+for.cond18:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %exit ], [ 0, %for.cond14 ]
+  %1 = trunc i64 %indvars.iv to i32
+  %cmp19 = icmp slt i32 %1, 1000
+  br i1 %cmp19, label %for.body20, label %for.inc38
+
+for.body20:
+  %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
+  br label %do.body.i
+
+do.body.i:
+  %n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
+  %p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
+  %2 = load i8* %p.0.i, align 1
+  %cmp3.i = icmp eq i8 %2, 120
+  br i1 %cmp3.i, label %exit, label %do.cond.i
+
+do.cond.i:
+  %incdec.ptr.i = getelementptr inbounds i8* %p.0.i, i64 1
+  %dec.i = add i64 %n.addr.0.i, -1
+  %cmp5.i = icmp eq i64 %dec.i, 0
+  br i1 %cmp5.i, label %if.then32, label %do.body.i
+
+exit:
+  %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
+  %cmp31 = icmp eq i8* %p.0.i, %add.ptr30
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  br i1 %cmp31, label %for.cond18, label %if.then32
+
+if.then32:
+  %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc38:
+  %inc39 = add nsw i32 %j13.0, 1
+  br label %for.cond14
+
+for.end40:
+  %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8]* @.str3, i64 0, i64 0))
+  ret i32 0
+}
+
+declare i8* @memchr(i8*, i32, i64) nounwind readonly
+declare void @exit(i32) noreturn
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 0dd74ea..51fcf64 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
 
 define float @test1(float %x) nounwind  {
   %call = tail call float @floorf(float %x) nounwind readnone
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index c8d9345..2e39473 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 ; PR5757
 
 %0 = type { i64, i32 }
@@ -12,6 +13,10 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
 ; CHECK: test1:
 ; CHECK: cmovneq %rdi, %rsi
 ; CHECK: movl (%rsi), %eax
+
+; ATOM: test1:
+; ATOM: cmovneq %rdi, %rsi
+; ATOM: movl (%rsi), %eax
 }
 
 
@@ -31,6 +36,10 @@ bb91:		; preds = %bb84
 ; CHECK: test2:
 ; CHECK: movnew
 ; CHECK: movswl
+
+; ATOM: test2:
+; ATOM: movnew
+; ATOM: movswl
 }
 
 declare i1 @return_false()
@@ -44,6 +53,9 @@ entry:
 	ret float %iftmp.0.0
 ; CHECK: test3:
 ; CHECK: movss	{{.*}},4), %xmm0
+
+; ATOM: test3:
+; ATOM: movss  {{.*}},4), %xmm0
 }
 
 define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
@@ -55,6 +67,9 @@ entry:
 	ret i8 %2
 ; CHECK: test4:
 ; CHECK: movsbl	({{.*}},4), %eax
+
+; ATOM: test4:
+; ATOM: movsbl ({{.*}},4), %eax
 }
 
 define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
@@ -62,6 +77,8 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
   store <2 x i16> %x, <2 x i16>* %p
   ret void
 ; CHECK: test5:
+
+; ATOM: test5:
 }
 
 define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
@@ -79,6 +96,12 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK: ret
 ; CHECK: mulps
 ; CHECK: ret
+
+; ATOM: test6:
+; ATOM: je
+; ATOM: ret
+; ATOM: mulps
+; ATOM: ret
 }
 
 ; Select with fp80's
@@ -89,6 +112,10 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
 ; CHECK: test7:
 ; CHECK: leaq
 ; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+
+; ATOM: test7:
+; ATOM: leaq
+; ATOM: fldt (%r{{.}}x,%r{{.}}x)
 }
 
 ; widening select v6i32 and then a sub
@@ -99,6 +126,8 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 	ret void
 
 ; CHECK: test8:
+
+; ATOM: test8:
 }
 
 
@@ -113,6 +142,12 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Same as test9
@@ -125,6 +160,12 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -137,6 +178,12 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9b:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Select between -1 and 1.
@@ -149,6 +196,12 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	$1, %rax
 ; CHECK: ret
+
+; ATOM: test10:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    $1, %rax
+; ATOM: ret
 }
 
 
@@ -163,6 +216,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -175,6 +235,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 
@@ -189,10 +256,16 @@ entry:
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
 ; CHECK: test12:
-; CHECK: mulq
 ; CHECK: movq $-1, %rdi
+; CHECK: mulq
 ; CHECK: cmovnoq	%rax, %rdi
 ; CHECK: jmp	__Znam
+
+; ATOM: test12:
+; ATOM: mulq
+; ATOM: movq $-1, %rdi
+; ATOM: cmovnoq        %rax, %rdi
+; ATOM: jmp    __Znam
 }
 
 declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
@@ -205,6 +278,11 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 ; CHECK: cmpl
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: ret
+
+; ATOM: test13:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: ret
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -216,6 +294,12 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: notl
 ; CHECK-NEXT: ret
+
+; ATOM: test14:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: notl
+; ATOM-NEXT: ret
 }
 
 ; rdar://10961709
@@ -227,6 +311,10 @@ entry:
 ; CHECK: test15:
 ; CHECK: negl
 ; CHECK: sbbl
+
+; ATOM: test15:
+; ATOM: negl
+; ATOM: sbbl
 }
 
 define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
@@ -237,6 +325,10 @@ entry:
 ; CHECK: test16:
 ; CHECK: negq
 ; CHECK: sbbq
+
+; ATOM: test16:
+; ATOM: negq
+; ATOM: sbbq
 }
 
 define i16 @test17(i16 %x) nounwind {
@@ -247,4 +339,8 @@ entry:
 ; CHECK: test17:
 ; CHECK: negw
 ; CHECK: sbbw
+
+; ATOM: test17:
+; ATOM: negw
+; ATOM: sbbw
 }
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index e81860b..1479be1 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,6 +1,6 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=SIN
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=COS
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=SIN
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=COS
 
 declare float  @sinf(float) readonly
 
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
new file mode 100644
index 0000000..c600f92
--- /dev/null
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; A MOV32ri is inside a loop, it has two successors, one successor is inside the
+; same loop, the other successor is outside the loop. We should be able to sink
+; MOV32ri outside the loop.
+; rdar://11980766
+define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
+; CHECK: sink_succ
+; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
+; CHECK: %exit
+; CHECK-NOT: movl
+; CHECK: jne [[OUTER_LN1]]
+; CHECK: movl
+; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2
+; CHECK: jne [[LN2]]
+; CHECK: ret
+entry:
+  br label %preheader
+
+preheader:
+  %i.127 = phi i32 [ 0, %entry ], [ %inc9, %exit ]
+  br label %for.body1.lr
+
+for.body1.lr:
+  %iv30 = phi i32 [ 1, %preheader ], [ %iv.next31, %for.inc40.i ]
+  br label %for.body1
+
+for.body1:
+  %iv.i = phi i64 [ 0, %for.body1.lr ], [ %iv.next.i, %for.body1 ]
+  %iv.next.i = add i64 %iv.i, 1
+  %lftr.wideiv32 = trunc i64 %iv.next.i to i32
+  %exitcond33 = icmp eq i32 %lftr.wideiv32, %iv30
+  br i1 %exitcond33, label %for.inc40.i, label %for.body1
+
+for.inc40.i:
+  %iv.next31 = add i32 %iv30, 1
+  %exitcond49.i = icmp eq i32 %iv.next31, 32
+  br i1 %exitcond49.i, label %exit, label %for.body1.lr
+
+exit:
+  %inc9 = add nsw i32 %i.127, 1
+  %exitcond34 = icmp eq i32 %inc9, 10
+  br i1 %exitcond34, label %for.body2, label %preheader
+
+for.body2:
+  %iv = phi i64 [ %iv.next, %for.body2 ], [ 0, %exit ]
+  %iv.next = add i64 %iv, 1
+  %lftr.wideiv = trunc i64 %iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 2048
+  br i1 %exitcond, label %for.end20, label %for.body2
+
+for.end20:
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 4405f68..0ba0215 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false  | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
@@ -47,8 +47,7 @@ define double @olt(double %x, double %y) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_inverse:
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_inverse:
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
@@ -65,8 +64,7 @@ define double @ogt_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_inverse:
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_inverse:
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
@@ -107,8 +105,7 @@ define double @ole(double %x, double %y) nounwind {
 ; CHECK:      oge_inverse:
 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
 ; UNSAFE:      oge_inverse:
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
@@ -123,8 +120,7 @@ define double @oge_inverse(double %x, double %y) nounwind {
 ; CHECK:      ole_inverse:
 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
 ; UNSAFE:      ole_inverse:
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
@@ -142,7 +138,8 @@ define double @ole_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -160,7 +157,8 @@ define double @ogt_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -218,7 +216,8 @@ define double @olt_inverse_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      oge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -234,7 +233,8 @@ define double @oge_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ole_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -313,8 +313,7 @@ define double @ult(double %x, double %y) nounwind {
 ; CHECK:      ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ugt_inverse:
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_inverse:
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
@@ -329,8 +328,7 @@ define double @ugt_inverse(double %x, double %y) nounwind {
 ; CHECK:      ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      ult_inverse:
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_inverse:
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
@@ -378,8 +376,7 @@ define double @ule(double %x, double %y) nounwind {
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_inverse:
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
@@ -395,8 +392,7 @@ define double @uge_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_inverse:
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
@@ -412,7 +408,8 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ugt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -428,7 +425,8 @@ define double @ugt_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      ult_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -483,7 +481,8 @@ define double @ult_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -502,7 +501,8 @@ define double @uge_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: minsd  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -590,9 +590,7 @@ define double @olt_y(double %x) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_inverse_y:
-; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
@@ -611,9 +609,7 @@ define double @ogt_inverse_y(double %x) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_inverse_y:
-; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
@@ -657,9 +653,7 @@ define double @ole_y(double %x) nounwind {
 ; CHECK:      oge_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      oge_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -675,9 +669,7 @@ define double @oge_inverse_y(double %x) nounwind {
 ; CHECK:      ole_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ole_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -721,9 +713,7 @@ define double @ult_y(double %x) nounwind {
 ; CHECK:      ugt_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ugt_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -739,9 +729,7 @@ define double @ugt_inverse_y(double %x) nounwind {
 ; CHECK:      ult_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ult_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -792,9 +780,7 @@ define double @ule_y(double %x) nounwind {
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_inverse_y:
-; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
@@ -811,9 +797,7 @@ define double @uge_inverse_y(double %x) nounwind {
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_inverse_y:
-; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index f6c13ec..0ddb237 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -10,11 +10,11 @@ target triple = "i686-apple-darwin8"
 define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 ) readnone	; <double> [#uses=1]
         store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
-	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 ) readnone	; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store volatile double %tmp6, double* %P, align 8
diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector.ll
index c075114..c075114 100644
--- a/test/CodeGen/X86/stack-protector-linux.ll
+++ b/test/CodeGen/X86/stack-protector.ll
diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll
new file mode 100644
index 0000000..a80b90f
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Teach CGP to dup returns to enable tail call optimization.
+; rdar://9147433
+
+define i32 @foo(i32 %x) nounwind ssp {
+; CHECK: foo:
+entry:
+  switch i32 %x, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb5
+    i32 5, label %sw.bb7
+    i32 6, label %sw.bb9
+  ]
+
+sw.bb:                                            ; preds = %entry
+; CHECK: jmp _f1
+  %call = tail call i32 @f1() nounwind
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+; CHECK: jmp _f2
+  %call2 = tail call i32 @f2() nounwind
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+; CHECK: jmp _f3
+  %call4 = tail call i32 @f3() nounwind
+  br label %return
+
+sw.bb5:                                           ; preds = %entry
+; CHECK: jmp _f4
+  %call6 = tail call i32 @f4() nounwind
+  br label %return
+
+sw.bb7:                                           ; preds = %entry
+; CHECK: jmp _f5
+  %call8 = tail call i32 @f5() nounwind
+  br label %return
+
+sw.bb9:                                           ; preds = %entry
+; CHECK: jmp _f6
+  %call10 = tail call i32 @f6() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+  %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare i32 @f3()
+
+declare i32 @f4()
+
+declare i32 @f5()
+
+declare i32 @f6()
+
+; rdar://11958338
+%0 = type opaque
+
+declare i8* @bar(i8*) uwtable optsize noinline ssp
+
+define hidden %0* @thingWithValue(i8* %self) uwtable ssp {
+entry:
+; CHECK: thingWithValue:
+; CHECK: jmp _bar
+  br i1 undef, label %if.then.i, label %if.else.i
+
+if.then.i:                                        ; preds = %entry
+  br label %someThingWithValue.exit
+
+if.else.i:                                        ; preds = %entry
+  %call4.i = tail call i8* @bar(i8* undef) optsize
+  br label %someThingWithValue.exit
+
+someThingWithValue.exit:                          ; preds = %if.else.i, %if.then.i
+  %retval.0.in.i = phi i8* [ undef, %if.then.i ], [ %call4.i, %if.else.i ]
+  %retval.0.i = bitcast i8* %retval.0.in.i to %0*
+  ret %0* %retval.0.i
+}
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
deleted file mode 100644
index b066297..0000000
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
-
-define void @test5() nounwind optsize noinline ssp {
-entry:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-  %buf = alloca [64 x i8], align 16
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
-  br i1 false, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 39c9b77..367dd27 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
 
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 3bd3f7b..c294df5 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -70,3 +70,17 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
 ; CHECK: call
 ; CHECK: roundss $4, %xmm{{.*}}, %xmm0
 }
+
+; PR13576 
+define  <2 x double> @test5() nounwind uwtable readnone noinline {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
+4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
+  ret <2 x double> %0
+; CHECK: test5:
+; CHECK: mov
+; CHECK: mov
+; CHECK: cvtsi2sd
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 73416ef..996bfc4 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -31,7 +31,7 @@ entry:
 ; X64: test3:
 ; X64:	notl
 ; X64:	andl
-; X64:	shrl	%eax
+; X64:	shrl
 ; X64:	ret
 
 ; X32: test3:
diff --git a/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
new file mode 100755
index 0000000..8848708
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-test4.elf-x86-64
diff --git a/test/DebugInfo/dwarfdump-test.test b/test/DebugInfo/dwarfdump-test.test
index faef03b..de23dcd 100644
--- a/test/DebugInfo/dwarfdump-test.test
+++ b/test/DebugInfo/dwarfdump-test.test
@@ -14,6 +14,9 @@ RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
 RUN:   --address=0x573 --functions | FileCheck %s -check-prefix INCLUDE_TEST_1
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test3.elf-x86-64 \
 RUN:   --address=0x56d --functions | FileCheck %s -check-prefix INCLUDE_TEST_2
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 \
+RUN:   --address=0x55c --functions \
+RUN:   | FileCheck %s -check-prefix MANY_SEQ_IN_LINE_TABLE
 
 MAIN: main
 MAIN-NEXT: /tmp/dbginfo{{[/\\]}}dwarfdump-test.cc:16:10
@@ -38,3 +41,6 @@ INCLUDE_TEST_1-NEXT: /tmp/dbginfo{{[/\\]}}include{{[/\\]}}decl2.h:1:0
 
 INCLUDE_TEST_2: _Z3do1v
 INCLUDE_TEST_2-NEXT: /tmp/include{{[/\\]}}decl.h:5:0
+
+MANY_SEQ_IN_LINE_TABLE: _Z1cv
+MANY_SEQ_IN_LINE_TABLE-NEXT: /tmp/dbginfo/sequences{{[/\\]}}c.cc:2:0
diff --git a/test/Feature/linker_private_linkages.ll b/test/Feature/linker_private_linkages.ll
index f9f2908..19bcbb4 100644
--- a/test/Feature/linker_private_linkages.ll
+++ b/test/Feature/linker_private_linkages.ll
@@ -4,4 +4,3 @@
 
 @foo = linker_private hidden global i32 0
 @bar = linker_private_weak hidden global i32 0
-@qux = linker_private_weak_def_auto global i32 0
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 294ca8a..d190001 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -23,15 +23,14 @@ define i32 @test_load(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[LOAD_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   %tmp1 = load i32* %a
-; CHECK:   ret i32 %tmp1
-
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_load4(i64 %[[LOAD_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   %tmp1 = load i32* %a
+; CHECK:   ret i32 %tmp1
+
 
 
 entry:
@@ -57,15 +56,14 @@ define void @test_store(i32* %a) address_safety {
 ; CHECK:   icmp sge i8 %{{.*}}, %[[STORE_SHADOW]]
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
-; The actual load comes next because ASan adds the crash block
-; to the end of the function.
-; CHECK:   store i32 42, i32* %a
-; CHECK:   ret void
-;
 ; The crash block reports the error.
 ; CHECK:   call void @__asan_report_store4(i64 %[[STORE_ADDR]])
 ; CHECK:   unreachable
 ;
+; The actual load.
+; CHECK:   store i32 42, i32* %a
+; CHECK:   ret void
+;
 
 entry:
   store i32 42, i32* %a
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
new file mode 100644
index 0000000..4725516
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+@xxx = global i32 0, align 4
+; Clang will emit the following metadata identifying @xxx as dynamically
+; initialized.
+!0 = metadata !{i32* @xxx}
+!llvm.asan.dynamically_initialized_globals = !{!0}
+
+define i32 @initializer() uwtable {
+entry:
+  ret i32 42
+}
+
+define internal void @__cxx_global_var_init() section ".text.startup" {
+entry:
+  %call = call i32 @initializer()
+  store i32 %call, i32* @xxx, align 4
+  ret void
+}
+
+define internal void @_GLOBAL__I_a() address_safety section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+
+; Clang indicated that @xxx was dynamically initailized.
+; __asan_{before,after}_dynamic_init should be called from _GLOBAL__I_a
+
+; CHECK: define internal void @_GLOBAL__I_a
+; CHECK-NOT: ret
+; CHECK: call void @__asan_before_dynamic_init
+; CHECK: call void @__cxx_global_var_init
+; CHECK: call void @__asan_after_dynamic_init
+; CHECK: ret
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index e682db5..5c2a214 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -141,6 +141,14 @@ Lforward:
 @ CHECK: adr	r2, #3                  @ encoding: [0x03,0x20,0x8f,0xe2]
 @ CHECK: adr	r2, #-3                 @ encoding: [0x03,0x20,0x4f,0xe2]
 
+        adr r1, #-0x0
+        adr r1, #-0x12000000
+        adr r1, #0x12000000
+
+@ CHECK: adr	r1, #-0                 @ encoding: [0x00,0x10,0x4f,0xe2]
+@ CHECK: adr	r1, #-301989888         @ encoding: [0x12,0x14,0x4f,0xe2]
+@ CHECK: adr	r1, #301989888          @ encoding: [0x12,0x14,0x8f,0xe2]
+
 
 @------------------------------------------------------------------------------
 @ ADD
@@ -559,6 +567,23 @@ Lforward:
 @------------------------------------------------------------------------------
 @ DMB
 @------------------------------------------------------------------------------
+        dmb #0xf
+        dmb #0xe
+        dmb #0xd
+        dmb #0xc
+        dmb #0xb
+        dmb #0xa
+        dmb #0x9
+        dmb #0x8
+        dmb #0x7
+        dmb #0x6
+        dmb #0x5
+        dmb #0x4
+        dmb #0x3
+        dmb #0x2
+        dmb #0x1
+        dmb #0x0
+
         dmb sy
         dmb st
         dmb sh
@@ -575,6 +600,23 @@ Lforward:
 
 @ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
 @ CHECK: dmb	st                      @ encoding: [0x5e,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0xd                    @ encoding: [0x5d,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0xc                    @ encoding: [0x5c,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x9                    @ encoding: [0x59,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x8                    @ encoding: [0x58,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nsh                     @ encoding: [0x57,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nshst                   @ encoding: [0x56,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x5                    @ encoding: [0x55,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x4                    @ encoding: [0x54,0xf0,0x7f,0xf5]
+@ CHECK: dmb	osh                     @ encoding: [0x53,0xf0,0x7f,0xf5]
+@ CHECK: dmb	oshst                   @ encoding: [0x52,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x1                    @ encoding: [0x51,0xf0,0x7f,0xf5]
+@ CHECK: dmb	#0x0                    @ encoding: [0x50,0xf0,0x7f,0xf5]
+
+@ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
+@ CHECK: dmb	st                      @ encoding: [0x5e,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
 @ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
@@ -590,6 +632,26 @@ Lforward:
 @------------------------------------------------------------------------------
 @ DSB
 @------------------------------------------------------------------------------
+        dsb #0xf
+        dsb #0xe
+        dsb #0xd
+        dsb #0xc
+        dsb #0xb
+        dsb #0xa
+        dsb #0x9
+        dsb #0x8
+        dsb #0x7
+        dsb #0x6
+        dsb #0x5
+        dsb #0x4
+        dsb #0x3
+        dsb #0x2
+        dsb #0x1
+        dsb #0x0
+
+        dsb 8
+        dsb 7
+
         dsb sy
         dsb st
         dsb sh
@@ -606,6 +668,26 @@ Lforward:
 
 @ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
 @ CHECK: dsb	st                      @ encoding: [0x4e,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0xd                    @ encoding: [0x4d,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0xc                    @ encoding: [0x4c,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x9                    @ encoding: [0x49,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x8                    @ encoding: [0x48,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nshst                   @ encoding: [0x46,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x5                    @ encoding: [0x45,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x4                    @ encoding: [0x44,0xf0,0x7f,0xf5]
+@ CHECK: dsb	osh                     @ encoding: [0x43,0xf0,0x7f,0xf5]
+@ CHECK: dsb	oshst                   @ encoding: [0x42,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x1                    @ encoding: [0x41,0xf0,0x7f,0xf5]
+@ CHECK: dsb	#0x0                    @ encoding: [0x40,0xf0,0x7f,0xf5]
+
+@ CHECK: dsb	#0x8                    @ encoding: [0x48,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+
+@ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
+@ CHECK: dsb	st                      @ encoding: [0x4e,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
 @ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 4cfe2f2..23d9f59 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -135,9 +135,11 @@ _func:
 
         subw r11, pc, #3270
         adr.w r11, #-826
+        adr.w r1, #-0x0
 
 @ CHECK: subw	r11, pc, #3270          @ encoding: [0xaf,0xf6,0xc6,0x4b]
 @ CHECK: adr.w	r11, #-826              @ encoding: [0xaf,0xf2,0x3a,0x3b]
+@ CHECK: adr.w	r1, #-0                 @ encoding: [0xaf,0xf2,0x00,0x01]
 
 @------------------------------------------------------------------------------
 @ AND (immediate)
@@ -415,6 +417,23 @@ _func:
 @------------------------------------------------------------------------------
 @ DMB
 @------------------------------------------------------------------------------
+        dmb #0xf
+        dmb #0xe
+        dmb #0xd
+        dmb #0xc
+        dmb #0xb
+        dmb #0xa
+        dmb #0x9
+        dmb #0x8
+        dmb #0x7
+        dmb #0x6
+        dmb #0x5
+        dmb #0x4
+        dmb #0x3
+        dmb #0x2
+        dmb #0x1
+        dmb #0x0
+
         dmb sy
         dmb st
         dmb sh
@@ -431,6 +450,23 @@ _func:
 
 @ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
 @ CHECK: dmb	st                      @ encoding: [0xbf,0xf3,0x5e,0x8f]
+@ CHECK: dmb	#0xd                    @ encoding: [0xbf,0xf3,0x5d,0x8f]
+@ CHECK: dmb	#0xc                    @ encoding: [0xbf,0xf3,0x5c,0x8f]
+@ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
+@ CHECK: dmb	ishst                   @ encoding: [0xbf,0xf3,0x5a,0x8f]
+@ CHECK: dmb	#0x9                    @ encoding: [0xbf,0xf3,0x59,0x8f]
+@ CHECK: dmb	#0x8                    @ encoding: [0xbf,0xf3,0x58,0x8f]
+@ CHECK: dmb	nsh                     @ encoding: [0xbf,0xf3,0x57,0x8f]
+@ CHECK: dmb	nshst                   @ encoding: [0xbf,0xf3,0x56,0x8f]
+@ CHECK: dmb	#0x5                    @ encoding: [0xbf,0xf3,0x55,0x8f]
+@ CHECK: dmb	#0x4                    @ encoding: [0xbf,0xf3,0x54,0x8f]
+@ CHECK: dmb	osh                     @ encoding: [0xbf,0xf3,0x53,0x8f]
+@ CHECK: dmb	oshst                   @ encoding: [0xbf,0xf3,0x52,0x8f]
+@ CHECK: dmb	#0x1                    @ encoding: [0xbf,0xf3,0x51,0x8f]
+@ CHECK: dmb	#0x0                    @ encoding: [0xbf,0xf3,0x50,0x8f]
+
+@ CHECK: dmb	sy                      @ encoding: [0xbf,0xf3,0x5f,0x8f]
+@ CHECK: dmb	st                      @ encoding: [0xbf,0xf3,0x5e,0x8f]
 @ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
 @ CHECK: dmb	ish                     @ encoding: [0xbf,0xf3,0x5b,0x8f]
 @ CHECK: dmb	ishst                   @ encoding: [0xbf,0xf3,0x5a,0x8f]
@@ -447,6 +483,23 @@ _func:
 @------------------------------------------------------------------------------
 @ DSB
 @------------------------------------------------------------------------------
+        dsb #0xf
+        dsb #0xe
+        dsb #0xd
+        dsb #0xc
+        dsb #0xb
+        dsb #0xa
+        dsb #0x9
+        dsb #0x8
+        dsb #0x7
+        dsb #0x6
+        dsb #0x5
+        dsb #0x4
+        dsb #0x3
+        dsb #0x2
+        dsb #0x1
+        dsb #0x0
+
         dsb sy
         dsb st
         dsb sh
@@ -463,6 +516,23 @@ _func:
 
 @ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
 @ CHECK: dsb	st                      @ encoding: [0xbf,0xf3,0x4e,0x8f]
+@ CHECK: dsb	#0xd                    @ encoding: [0xbf,0xf3,0x4d,0x8f]
+@ CHECK: dsb	#0xc                    @ encoding: [0xbf,0xf3,0x4c,0x8f]
+@ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
+@ CHECK: dsb	ishst                   @ encoding: [0xbf,0xf3,0x4a,0x8f]
+@ CHECK: dsb	#0x9                    @ encoding: [0xbf,0xf3,0x49,0x8f]
+@ CHECK: dsb	#0x8                    @ encoding: [0xbf,0xf3,0x48,0x8f]
+@ CHECK: dsb	nsh                     @ encoding: [0xbf,0xf3,0x47,0x8f]
+@ CHECK: dsb	nshst                   @ encoding: [0xbf,0xf3,0x46,0x8f]
+@ CHECK: dsb	#0x5                    @ encoding: [0xbf,0xf3,0x45,0x8f]
+@ CHECK: dsb	#0x4                    @ encoding: [0xbf,0xf3,0x44,0x8f]
+@ CHECK: dsb	osh                     @ encoding: [0xbf,0xf3,0x43,0x8f]
+@ CHECK: dsb	oshst                   @ encoding: [0xbf,0xf3,0x42,0x8f]
+@ CHECK: dsb	#0x1                    @ encoding: [0xbf,0xf3,0x41,0x8f]
+@ CHECK: dsb	#0x0                    @ encoding: [0xbf,0xf3,0x40,0x8f]
+
+@ CHECK: dsb	sy                      @ encoding: [0xbf,0xf3,0x4f,0x8f]
+@ CHECK: dsb	st                      @ encoding: [0xbf,0xf3,0x4e,0x8f]
 @ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
 @ CHECK: dsb	ish                     @ encoding: [0xbf,0xf3,0x4b,0x8f]
 @ CHECK: dsb	ishst                   @ encoding: [0xbf,0xf3,0x4a,0x8f]
@@ -782,6 +852,9 @@ _func:
         ldrd r3, r5, [r6], #-8
         ldrd r3, r5, [r6]
         ldrd r8, r1, [r3, #0]
+        ldrd r0, r1, [r2, #-0]
+        ldrd r0, r1, [r2, #-0]!
+        ldrd r0, r1, [r2], #-0
 
 @ CHECK: ldrd	r3, r5, [r6, #24]       @ encoding: [0xd6,0xe9,0x06,0x35]
 @ CHECK: ldrd	r3, r5, [r6, #24]!      @ encoding: [0xf6,0xe9,0x06,0x35]
@@ -789,6 +862,9 @@ _func:
 @ CHECK: ldrd	r3, r5, [r6], #-8       @ encoding: [0x76,0xe8,0x02,0x35]
 @ CHECK: ldrd	r3, r5, [r6]            @ encoding: [0xd6,0xe9,0x00,0x35]
 @ CHECK: ldrd	r8, r1, [r3]            @ encoding: [0xd3,0xe9,0x00,0x81]
+@ CHECK: ldrd	r0, r1, [r2, #-0]       @ encoding: [0x52,0xe9,0x00,0x01]
+@ CHECK: ldrd	r0, r1, [r2, #-0]!      @ encoding: [0x72,0xe9,0x00,0x01]
+@ CHECK: ldrd	r0, r1, [r2], #-0       @ encoding: [0x72,0xe8,0x00,0x01]
 
 
 @------------------------------------------------------------------------------
@@ -2566,6 +2642,9 @@ _func:
         strd r3, r5, [r6], #-8
         strd r3, r5, [r6]
         strd r8, r1, [r3, #0]
+        strd r0, r1, [r2, #-0]
+        strd r0, r1, [r2, #-0]!
+        strd r0, r1, [r2], #-0
 
 @ CHECK: strd	r3, r5, [r6, #24]       @ encoding: [0xc6,0xe9,0x06,0x35]
 @ CHECK: strd	r3, r5, [r6, #24]!      @ encoding: [0xe6,0xe9,0x06,0x35]
@@ -2573,6 +2652,9 @@ _func:
 @ CHECK: strd	r3, r5, [r6], #-8       @ encoding: [0x66,0xe8,0x02,0x35]
 @ CHECK: strd	r3, r5, [r6]            @ encoding: [0xc6,0xe9,0x00,0x35]
 @ CHECK: strd	r8, r1, [r3]            @ encoding: [0xc3,0xe9,0x00,0x81]
+@ CHECK: strd   r0, r1, [r2, #-0]       @ encoding: [0x42,0xe9,0x00,0x01]
+@ CHECK: strd   r0, r1, [r2, #-0]!      @ encoding: [0x62,0xe9,0x00,0x01]
+@ CHECK: strd   r0, r1, [r2], #-0       @ encoding: [0x62,0xe8,0x00,0x01]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
index cc87a38..0a1fe92 100644
--- a/test/MC/ARM/vfp4.s
+++ b/test/MC/ARM/vfp4.s
@@ -1,5 +1,6 @@
 @ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
 @ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
+@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mcpu=cortex-m4 | FileCheck %s --check-prefix=THUMB_V7EM
 
 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
@@ -7,6 +8,7 @@ vfma.f64 d16, d18, d17
 
 @ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
 @ THUMB: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
+@ THUMB_V7EM: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
 vfma.f32 s2, s4, s0
 
 @ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
@@ -23,6 +25,7 @@ vfnma.f64 d16, d18, d17
 
 @ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
 @ THUMB: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
+@ THUMB_V7EM: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
 vfnma.f32 s2, s4, s0
 
 @ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
@@ -31,6 +34,7 @@ vfms.f64 d16, d18, d17
 
 @ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
 @ THUMB: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
+@ THUMB_V7EM: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
 vfms.f32 s2, s4, s0
 
 @ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
diff --git a/test/MC/AsmParser/bad-macro.s b/test/MC/AsmParser/bad-macro.s
new file mode 100644
index 0000000..313607b
--- /dev/null
+++ b/test/MC/AsmParser/bad-macro.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2>&1 | FileCheck %s
+
+.macro 23
+
+// CHECK: expected identifier in '.macro' directive
+
+.macro abc 33
+
+// CHECK: expected identifier in '.macro' directive
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
index 13b197a..6d08421 100644
--- a/test/MC/AsmParser/macro-args.s
+++ b/test/MC/AsmParser/macro-args.s
@@ -42,3 +42,15 @@ top bar, 42
 // CHECK-NOT: fred
 // CHECK: _bar
 // CHECK-NEXT: fred = 42
+
+
+.macro foo
+foo_$0_$1_$2_$3:
+  nop
+.endm
+
+foo 1, 2, 3, 4
+foo 1, , 3, 4
+
+// CHECK: foo_1_2_3_4:
+// CHECK: foo_1__3_4:
diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s
index 2957592..dd2cc1f 100644
--- a/test/MC/AsmParser/macros.s
+++ b/test/MC/AsmParser/macros.s
@@ -37,3 +37,24 @@ test3 1,2 3
 
 // CHECK: .globl	"ab)(,) -- (cd)"
 test4 a b)(,),(cd)
+
+.macro test5 _a
+.globl "\_a"
+.endm
+
+test5 zed1
+// CHECK: .globl zed1
+
+.macro test6 $a
+.globl "\$a"
+.endm
+
+test6 zed2
+// CHECK: .globl zed2
+
+.macro test7 .a
+.globl "\.a"
+.endm
+
+test7 zed3
+// CHECK: .globl zed3
diff --git a/test/MC/COFF/seh.s b/test/MC/COFF/seh.s
index 8cafcb3..3f72805 100644
--- a/test/MC/COFF/seh.s
+++ b/test/MC/COFF/seh.s
@@ -14,7 +14,6 @@
 // CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
 // CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
 // CHECK-NEXT:   IMAGE_SCN_MEM_READ
-// CHECK-NEXT:   IMAGE_SCN_MEM_WRITE
 // CHECK-NEXT: SectionData
 // CHECK-NEXT:   09 12 08 03 00 03 0F 30 - 0E 88 00 00 09 64 02 00
 // CHECK-NEXT:   04 22 00 1A 00 00 00 00 - 00 00 00 00 21 00 00 00
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index 40eb4cd..1100ce6 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -169,9 +169,15 @@
 #------------------------------------------------------------------------------
 # CHECK: add	r2, pc, #3
 # CHECK: sub	r2, pc, #3
+# CHECK: sub	r1, pc, #0
+# CHECK: sub	r1, pc, #301989888
+# CHECK: add	r1, pc, #301989888
 
 0x03 0x20 0x8f 0xe2
 0x03 0x20 0x4f 0xe2
+0x00 0x10 0x4f 0xe2
+0x12 0x14 0x4f 0xe2
+0x12 0x14 0x8f 0xe2
 
 #------------------------------------------------------------------------------
 # AND
@@ -469,47 +475,77 @@
 #------------------------------------------------------------------------------
 # DMB
 #------------------------------------------------------------------------------
-# CHECK: dmb sy
-# CHECK: dmb st
-# CHECK: dmb ish
-# CHECK: dmb ishst
-# CHECK: dmb nsh
-# CHECK: dmb nshst
-# CHECK: dmb osh
+
+# CHECK: dmb #0x0
+# CHECK: dmb #0x1
 # CHECK: dmb oshst
-# CHECK: dmb
+# CHECK: dmb osh
+# CHECK: dmb #0x4
+# CHECK: dmb #0x5
+# CHECK: dmb nshst
+# CHECK: dmb nsh
+# CHECK: dmb #0x8
+# CHECK: dmb #0x9
+# CHECK: dmb ishst
+# CHECK: dmb ish
+# CHECK: dmb #0xc
+# CHECK: dmb #0xd
+# CHECK: dmb st
+# CHECK: dmb sy
 
-0x5f 0xf0 0x7f 0xf5
-0x5e 0xf0 0x7f 0xf5
-0x5b 0xf0 0x7f 0xf5
-0x5a 0xf0 0x7f 0xf5
-0x57 0xf0 0x7f 0xf5
-0x56 0xf0 0x7f 0xf5
-0x53 0xf0 0x7f 0xf5
+0x50 0xf0 0x7f 0xf5
+0x51 0xf0 0x7f 0xf5
 0x52 0xf0 0x7f 0xf5
+0x53 0xf0 0x7f 0xf5
+0x54 0xf0 0x7f 0xf5
+0x55 0xf0 0x7f 0xf5
+0x56 0xf0 0x7f 0xf5
+0x57 0xf0 0x7f 0xf5
+0x58 0xf0 0x7f 0xf5
+0x59 0xf0 0x7f 0xf5
+0x5a 0xf0 0x7f 0xf5
+0x5b 0xf0 0x7f 0xf5
+0x5c 0xf0 0x7f 0xf5
+0x5d 0xf0 0x7f 0xf5
+0x5e 0xf0 0x7f 0xf5
 0x5f 0xf0 0x7f 0xf5
 
 #------------------------------------------------------------------------------
 # DSB
 #------------------------------------------------------------------------------
-# CHECK: dsb sy
-# CHECK: dsb st
-# CHECK: dsb ish
-# CHECK: dsb ishst
-# CHECK: dsb nsh
-# CHECK: dsb nshst
-# CHECK: dsb osh
-# CHECK: dsb oshst
-# CHECK: dsb
 
-0x4f 0xf0 0x7f 0xf5
-0x4e 0xf0 0x7f 0xf5
-0x4b 0xf0 0x7f 0xf5
-0x4a 0xf0 0x7f 0xf5
-0x47 0xf0 0x7f 0xf5
-0x46 0xf0 0x7f 0xf5
-0x43 0xf0 0x7f 0xf5
+# CHECK: dsb	#0x0
+# CHECK: dsb	#0x1
+# CHECK: dsb	oshst
+# CHECK: dsb	osh
+# CHECK: dsb	#0x4
+# CHECK: dsb	#0x5
+# CHECK: dsb	nshst
+# CHECK: dsb	nsh
+# CHECK: dsb	#0x8
+# CHECK: dsb	#0x9
+# CHECK: dsb	ishst
+# CHECK: dsb	ish
+# CHECK: dsb	#0xc
+# CHECK: dsb	#0xd
+# CHECK: dsb	st
+# CHECK: dsb	sy
+
+0x40 0xf0 0x7f 0xf5
+0x41 0xf0 0x7f 0xf5
 0x42 0xf0 0x7f 0xf5
+0x43 0xf0 0x7f 0xf5
+0x44 0xf0 0x7f 0xf5
+0x45 0xf0 0x7f 0xf5
+0x46 0xf0 0x7f 0xf5
+0x47 0xf0 0x7f 0xf5
+0x48 0xf0 0x7f 0xf5
+0x49 0xf0 0x7f 0xf5
+0x4a 0xf0 0x7f 0xf5
+0x4b 0xf0 0x7f 0xf5
+0x4c 0xf0 0x7f 0xf5
+0x4d 0xf0 0x7f 0xf5
+0x4e 0xf0 0x7f 0xf5
 0x4f 0xf0 0x7f 0xf5
 
 #------------------------------------------------------------------------------
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 96f8303..649424a 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -1931,14 +1931,6 @@
 # CHECK: vmov.f32	d0, #1.600000e+01
 # CHECK: vmov.f32	q0, #1.600000e+01
 
-# rdar://10798451
-0xe7 0xf9 0x32 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
-0xe7 0xf9 0x3d 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
-0xe7 0xf9 0x3f 0x1d
-# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
-
 # rdar://11034702
 0x0d 0x87 0x04 0xf4
 # CHECK: vst1.8	{d8}, [r4]!            
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index e721984..7d7010f 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -2042,3 +2042,13 @@
 # CHECK: vld2.16	{d0[], d2[]}, [r3], r4  
 0xa3 0xf9 0xa4 0x0d
 # CHECK: vld2.32	{d0[], d2[]}, [r3], r4  
+
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK: vld2.8	{d17[], d19[]}, [r7, :16]
+
diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt
index 4d1b398..42ebe58 100644
--- a/test/MC/Disassembler/ARM/thumb2.txt
+++ b/test/MC/Disassembler/ARM/thumb2.txt
@@ -92,9 +92,11 @@
 #------------------------------------------------------------------------------
 # CHECK: subw r11, pc, #3270
 # CHECK: subw r11, pc, #826
+# CHECK: subw r1, pc, #0
 
 0xaf 0xf6 0xc6 0x4b
 0xaf 0xf2 0x3a 0x3b
+0xaf 0xf2 0x00 0x01
 
 #------------------------------------------------------------------------------
 # AND (immediate)
@@ -344,23 +346,37 @@
 #------------------------------------------------------------------------------
 #CHECK: dmb sy
 #CHECK: dmb st
+#CHECK: dmb #0xd
+#CHECK: dmb #0xc
 #CHECK: dmb ish
 #CHECK: dmb ishst
+#CHECK: dmb #0x9
+#CHECK: dmb #0x8
 #CHECK: dmb nsh
 #CHECK: dmb nshst
+#CHECK: dmb #0x5
+#CHECK: dmb #0x4
 #CHECK: dmb osh
 #CHECK: dmb oshst
-#CHECK: dmb
+#CHECK: dmb #0x1
+#CHECK: dmb #0x0
 
 0xbf 0xf3 0x5f 0x8f
 0xbf 0xf3 0x5e 0x8f
+0xbf 0xf3 0x5d 0x8f
+0xbf 0xf3 0x5c 0x8f
 0xbf 0xf3 0x5b 0x8f
 0xbf 0xf3 0x5a 0x8f
+0xbf 0xf3 0x59 0x8f
+0xbf 0xf3 0x58 0x8f
 0xbf 0xf3 0x57 0x8f
 0xbf 0xf3 0x56 0x8f
+0xbf 0xf3 0x55 0x8f
+0xbf 0xf3 0x54 0x8f
 0xbf 0xf3 0x53 0x8f
 0xbf 0xf3 0x52 0x8f
-0xbf 0xf3 0x5f 0x8f
+0xbf 0xf3 0x51 0x8f
+0xbf 0xf3 0x50 0x8f
 
 
 #------------------------------------------------------------------------------
@@ -368,21 +384,37 @@
 #------------------------------------------------------------------------------
 #CHECK: dsb sy
 #CHECK: dsb st
+#CHECK: dsb #0xd
+#CHECK: dsb #0xc
 #CHECK: dsb ish
 #CHECK: dsb ishst
+#CHECK: dsb #0x9
+#CHECK: dsb #0x8
 #CHECK: dsb nsh
 #CHECK: dsb nshst
+#CHECK: dsb #0x5
+#CHECK: dsb #0x4
 #CHECK: dsb osh
 #CHECK: dsb oshst
+#CHECK: dsb #0x1
+#CHECK: dsb #0x0
 
 0xbf 0xf3 0x4f 0x8f
 0xbf 0xf3 0x4e 0x8f
+0xbf 0xf3 0x4d 0x8f
+0xbf 0xf3 0x4c 0x8f
 0xbf 0xf3 0x4b 0x8f
 0xbf 0xf3 0x4a 0x8f
+0xbf 0xf3 0x49 0x8f
+0xbf 0xf3 0x48 0x8f
 0xbf 0xf3 0x47 0x8f
 0xbf 0xf3 0x46 0x8f
+0xbf 0xf3 0x45 0x8f
+0xbf 0xf3 0x44 0x8f
 0xbf 0xf3 0x43 0x8f
 0xbf 0xf3 0x42 0x8f
+0xbf 0xf3 0x41 0x8f
+0xbf 0xf3 0x40 0x8f
 
 
 #------------------------------------------------------------------------------
@@ -609,6 +641,9 @@
 # CHECK: ldrd r3, r5, [r6], #-8
 # CHECK: ldrd r3, r5, [r6]
 # CHECK: ldrd r8, r1, [r3]
+# CHECK: ldrd r0, r1, [r2], #-0
+# CHECK: ldrd r0, r1, [r2, #-0]!
+# CHECK: ldrd r0, r1, [r2, #-0]
 
 0xd6 0xe9 0x06 0x35
 0xf6 0xe9 0x06 0x35
@@ -616,6 +651,9 @@
 0x76 0xe8 0x02 0x35
 0xd6 0xe9 0x00 0x35
 0xd3 0xe9 0x00 0x81
+0x72 0xe8 0x00 0x01
+0x72 0xe9 0x00 0x01
+0x52 0xe9 0x00 0x01
 
 
 #------------------------------------------------------------------------------
@@ -1790,12 +1828,16 @@
 # STRD (immediate)
 #------------------------------------------------------------------------------
 # CHECK: strd r6, r3, [r5], #-8
-# CHECK: strd r8, r5, [r5]{{$}}
+# CHECK: strd r8, r5, [r5], #-0
 # CHECK: strd r7, r4, [r5], #-4
+# CHECK: strd r0, r1, [r2, #-0]!
+# CHECK: strd r0, r1, [r2, #-0]
 
 0x65 0xe8 0x02 0x63
 0x65 0xe8 0x00 0x85
 0x65 0xe8 0x01 0x74
+0x62 0xe9 0x00 0x01
+0x42 0xe9 0x00 0x01
 
 #------------------------------------------------------------------------------
 # STREX/STREXB/STREXH/STREXD
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 15e046f..672d239 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -123,10 +123,10 @@
 # CHECK: vcvtss2sil %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %rax
+# CHECK: vcvtsd2siq %xmm0, %rax
 0xc4 0xe1 0xfb 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%rax)
@@ -437,10 +437,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %rax
+# CHECK: vcvtsd2siq %xmm0, %rax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index 3ec55f9..899657b 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -159,10 +159,10 @@
 # CHECK: vcvtss2sil %xmm0, %eax
 0xc5 0xfa 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc5 0xfb 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7b 0x2d 0xc0
 
 # CHECK: vmaskmovpd %xmm0, %xmm1, (%eax)
@@ -460,10 +460,10 @@
 # CHECK: vroundsd $0, %xmm0, %xmm0, %xmm0
 0xc4 0xe3 0x7d 0x0b 0xc0 0x00
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0x7f 0x2d 0xc0
 
-# CHECK: vcvtsd2si %xmm0, %eax
+# CHECK: vcvtsd2sil %xmm0, %eax
 0xc4 0xe1 0xff 0x2d 0xc0
 
 # CHECK: vucomisd %xmm1, %xmm0
diff --git a/test/MC/ELF/fde.s b/test/MC/ELF/fde.s
new file mode 100644
index 0000000..52ee33f
--- /dev/null
+++ b/test/MC/ELF/fde.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -filetype=obj %s -o %t.o -triple x86_64-pc-linux-gnu && llvm-objdump -s %t.o
+# PR13581
+
+# CHECK: Contents of section .debug_frame:
+# CHECK-NEXT:  0000 14000000 ffffffff 01000178 100c0708  ...........x....
+# CHECK-NEXT:  0010 90010000 00000000 1c000000 00000000  ................
+# CHECK-NEXT:  0020 00000000 00000000 11000000 00000000  ................
+# CHECK-NEXT:  0030 410e1086 02430d06                    A....C..
+
+__cxx_global_var_init:                  # @__cxx_global_var_init
+        .cfi_startproc
+.Lfunc_begin0:
+# BB#0:                                 # %entry
+        pushq   %rbp
+.Ltmp2:
+        .cfi_def_cfa_offset 16
+.Ltmp3:
+        .cfi_offset %rbp, -16
+        movq    %rsp, %rbp
+.Ltmp4:
+        .cfi_def_cfa_register %rbp
+.Ltmp5:
+        callq   _Z2rsv@PLT
+        movl    %eax, _ZL1i(%rip)
+        popq    %rbp
+        ret
+        .cfi_endproc
+        .cfi_sections .debug_frame
diff --git a/test/MC/MachO/ARM/thumb2-movw-fixup.s b/test/MC/MachO/ARM/thumb2-movw-fixup.s
new file mode 100644
index 0000000..57973a8
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb2-movw-fixup.s
@@ -0,0 +1,44 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+
+@ rdar://10038370
+
+	.syntax unified
+  .text
+	.align	2
+	.code	16           
+	.thumb_func	_foo
+  movw	r2, :lower16:L1
+	movt	r2, :upper16:L1
+  movw	r12, :lower16:L2
+	movt	r12, :upper16:L2
+  .space 70000
+  
+  .data
+L1: .long 0
+L2: .long 0
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0xc),
+@ CHECK:     ('word-1', 0x86000002)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0x1184),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 2
+@ CHECK:    (('word-0', 0x8),
+@ CHECK:     ('word-1', 0x84000002)),
+@ CHECK:    # Relocation 3
+@ CHECK:    (('word-0', 0x1),
+@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK:    # Relocation 4
+@ CHECK:    (('word-0', 0x4),
+@ CHECK:     ('word-1', 0x86000002)),
+@ CHECK:    # Relocation 5
+@ CHECK:    (('word-0', 0x1180),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 6
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x84000002)),
+@ CHECK:    # Relocation 7
+@ CHECK:    (('word-0', 0x1),
+@ CHECK:     ('word-1', 0x14ffffff)),
diff --git a/test/MC/MachO/previous.s b/test/MC/MachO/previous.s
new file mode 100644
index 0000000..41077cd
--- /dev/null
+++ b/test/MC/MachO/previous.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o - | FileCheck %s
+
+.text
+// CHECK: .section __TEXT,__text
+
+.data
+// CHECK: .section __DATA,__data
+
+.previous
+// CHECK: .section __TEXT,__text
+
+.previous
+// CHECK: .section __DATA,__data
diff --git a/test/MC/MachO/pushsection.s b/test/MC/MachO/pushsection.s
new file mode 100644
index 0000000..6881323
--- /dev/null
+++ b/test/MC/MachO/pushsection.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o - | FileCheck %s
+
+.text
+// CHECK: .section __TEXT,__text
+
+.pushsection __DATA, __data
+// CHECK: .section __DATA,__data
+
+.pushsection __TEXT, initcode
+// CHECK: .section __TEXT,initcode
+        
+.popsection
+// CHECK: .section __DATA,__data
+        
+.popsection
+// CHECK: .section __TEXT,__text
diff --git a/test/MC/Mips/higher_highest.ll b/test/MC/Mips/higher_highest.ll
new file mode 100644
index 0000000..81a89e3
--- /dev/null
+++ b/test/MC/Mips/higher_highest.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64  -force-mips-long-branch -filetype=obj < %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+; Check that the R_MIPS_HIGHER and R_MIPS_HIGHEST relocations were created.
+
+; CHECK:     ('r_type', 0x1d)
+; CHECK:     ('r_type', 0x1d)
+; CHECK:     ('r_type', 0x1c)
+; CHECK:     ('r_type', 0x1c)
+
+@g0 = external global i32
+
+define void @foo1(i32 %s) nounwind {
+entry:
+
+  %tobool = icmp eq i32 %s, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* @g0, align 4
+  %add = add nsw i32 %0, 12
+  store i32 %add, i32* @g0, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
diff --git a/test/MC/Mips/lea_64.ll b/test/MC/Mips/lea_64.ll
new file mode 100644
index 0000000..2e7a37b
--- /dev/null
+++ b/test/MC/Mips/lea_64.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple mips64el - \
+; RUN:  | FileCheck %s
+
+@p = external global i32*
+
+define void @f1() nounwind {
+entry:
+; CHECK: .text:
+; CHECK-NOT: addiu {{[0-9,a-f]+}}, {{[0-9,a-f]+}}, {{[0-9]+}}
+
+  %a = alloca [10 x i32], align 4
+  %arraydecay = getelementptr inbounds [10 x i32]* %a, i64 0, i64 0
+  store i32* %arraydecay, i32** @p, align 8
+  ret void
+
+; CHECK: jr $ra
+}
diff --git a/test/MC/Mips/sext_64_32.ll b/test/MC/Mips/sext_64_32.ll
new file mode 100644
index 0000000..e5c57b8
--- /dev/null
+++ b/test/MC/Mips/sext_64_32.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mips64el -filetype=obj -mcpu=mips64r2 %s -o - | llvm-objdump -disassemble -triple mips64el - | FileCheck %s
+
+; Sign extend from 32 to 64 was creating nonsense opcodes
+
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+
+define i64 @foo(i32 %ival) nounwind readnone {
+entry:
+  %conv = sext i32 %ival to i64
+  ret i64 %conv
+}
+
+; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 0
+
+define i64 @foo_2(i32 %ival_2) nounwind readnone {
+entry:
+  %conv_2 = zext i32 %ival_2 to i64
+  ret i64 %conv_2
+}
+
diff --git a/test/MC/X86/x86-32-avx.s b/test/MC/X86/x86-32-avx.s
index 9a7a506..586f3fe 100644
--- a/test/MC/X86/x86-32-avx.s
+++ b/test/MC/X86/x86-32-avx.s
@@ -3103,21 +3103,21 @@
 // CHECK: encoding: [0xc5,0xf8,0x77]
           vzeroupper
 
-// CHECK: vcvtsd2si  %xmm4, %ecx
+// CHECK: vcvtsd2sil  %xmm4, %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
-          vcvtsd2si  %xmm4, %ecx
+          vcvtsd2sil  %xmm4, %ecx
 
-// CHECK: vcvtsd2si  (%ecx), %ecx
+// CHECK: vcvtsd2sil  (%ecx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%ecx), %ecx
+          vcvtsd2sil  (%ecx), %ecx
 
-// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+// CHECK: vcvtsi2sd  (%ebp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+          vcvtsi2sd  (%ebp), %xmm0, %xmm7
 
-// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
+// CHECK: vcvtsi2sd  (%esp), %xmm0, %xmm7
 // CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
-          vcvtsi2sdl  (%esp), %xmm0, %xmm7
+          vcvtsi2sd  (%esp), %xmm0, %xmm7
 
 // CHECK: vlddqu  (%eax), %ymm2
 // CHECK: encoding: [0xc5,0xff,0xf0,0x10]
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index 930e33b..46ff9ea 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -3860,29 +3860,29 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
           vperm2f128  $7, (%rax), %ymm10, %ymm11
 
-// CHECK: vcvtsd2si  %xmm8, %r8d
+// CHECK: vcvtsd2sil  %xmm8, %r8d
 // CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
-          vcvtsd2si  %xmm8, %r8d
+          vcvtsd2sil  %xmm8, %r8d
 
-// CHECK: vcvtsd2si  (%rcx), %ecx
+// CHECK: vcvtsd2sil  (%rcx), %ecx
 // CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
-          vcvtsd2si  (%rcx), %ecx
+          vcvtsd2sil  (%rcx), %ecx
 
-// CHECK: vcvtss2si  %xmm4, %rcx
+// CHECK: vcvtss2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
-          vcvtss2si  %xmm4, %rcx
+          vcvtss2siq  %xmm4, %rcx
 
-// CHECK: vcvtss2si  (%rcx), %r8
+// CHECK: vcvtss2siq  (%rcx), %r8
 // CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
-          vcvtss2si  (%rcx), %r8
+          vcvtss2siq  (%rcx), %r8
 
-// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
+// CHECK: vcvtsi2sd  %r8d, %xmm8, %xmm15
 // CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
-          vcvtsi2sdl  %r8d, %xmm8, %xmm15
+          vcvtsi2sd  %r8d, %xmm8, %xmm15
 
-// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+// CHECK: vcvtsi2sd  (%rbp), %xmm8, %xmm15
 // CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
-          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+          vcvtsi2sd  (%rbp), %xmm8, %xmm15
 
 // CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
 // CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
@@ -3900,21 +3900,21 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
           vcvtsi2ssq  (%rcx), %xmm4, %xmm6
 
-// CHECK: vcvttsd2si  %xmm4, %rcx
+// CHECK: vcvttsd2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
-          vcvttsd2si  %xmm4, %rcx
+          vcvttsd2siq  %xmm4, %rcx
 
-// CHECK: vcvttsd2si  (%rcx), %rcx
+// CHECK: vcvttsd2siq  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
-          vcvttsd2si  (%rcx), %rcx
+          vcvttsd2siq  (%rcx), %rcx
 
-// CHECK: vcvttss2si  %xmm4, %rcx
+// CHECK: vcvttss2siq  %xmm4, %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
-          vcvttss2si  %xmm4, %rcx
+          vcvttss2siq  %xmm4, %rcx
 
-// CHECK: vcvttss2si  (%rcx), %rcx
+// CHECK: vcvttss2siq  (%rcx), %rcx
 // CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
-          vcvttss2si  (%rcx), %rcx
+          vcvttss2siq  (%rcx), %rcx
 
 // CHECK: vlddqu  (%rax), %ymm12
 // CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
diff --git a/test/Makefile b/test/Makefile
index 483db23..9ddfabf 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -62,6 +62,15 @@ clang-site-cfg: FORCE
 	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg Unit/lit.site.cfg
 extra-site-cfgs:: clang-site-cfg
 endif
+
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/Makefile && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test
+
+# Force creation of Clang Tools' lit.site.cfg.
+clang-tools-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg
+extra-site-cfgs:: clang-tools-site-cfg
+endif
 endif
 endif
 
@@ -122,7 +131,7 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
-	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml=g >> lit.tmp
+	@$(ECHOPATH) s,@OCAMLOPT@,$(OCAMLOPT) -cc \\\\\"$(CXX_FOR_OCAMLOPT)\\\\\" -I $(LibDir)/ocaml,g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
diff --git a/test/Object/Inputs/COFF/i386.yaml b/test/Object/Inputs/COFF/i386.yaml
new file mode 100644
index 0000000..ca90222
--- /dev/null
+++ b/test/Object/Inputs/COFF/i386.yaml
@@ -0,0 +1,83 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14c)
+
+sections:
+  - !Section
+    Name: .text
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    SectionData:  !hex "83EC0CC744240800000000C7042400000000E800000000E8000000008B44240883C40CC3" # |....D$.......$...............D$.....|
+
+    Relocations:
+      - !Relocation
+        VirtualAddress: 0xe
+        SymbolTableIndex: 5
+        Type: IMAGE_REL_I386_DIR32
+
+      - !Relocation
+        VirtualAddress: 0x13
+        SymbolTableIndex: 6
+        Type: IMAGE_REL_I386_REL32
+
+      - !Relocation
+        VirtualAddress: 0x18
+        SymbolTableIndex: 7
+        Type: IMAGE_REL_I386_REL32
+
+  - !Section
+    Name: .data
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+symbols:
+  - !Symbol
+    Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "240000000300000000000000010000000000" # |$.................|
+
+  - !Symbol
+    Name: .data
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+  - !Symbol
+    Name: _main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: L_.str
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+  - !Symbol
+    Name: _puts
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: _SomeOtherFunction
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
diff --git a/test/Object/Inputs/COFF/x86-64.yaml b/test/Object/Inputs/COFF/x86-64.yaml
new file mode 100644
index 0000000..0b1265f
--- /dev/null
+++ b/test/Object/Inputs/COFF/x86-64.yaml
@@ -0,0 +1,83 @@
+header: !Header
+  Machine: IMAGE_FILE_MACHINE_AMD64 # (0x8664)
+
+sections:
+  - !Section
+    Name: .text
+    Characteristics: [IMAGE_SCN_CNT_CODE, IMAGE_SCN_ALIGN_16BYTES, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ, ] # 0x60500020
+    SectionData:  !hex "4883EC28C744242400000000488D0D00000000E800000000E8000000008B4424244883C428C3" # |H..(.D$$....H.................D$$H..(.|
+
+    Relocations:
+      - !Relocation
+        VirtualAddress: 0xf
+        SymbolTableIndex: 5
+        Type: IMAGE_REL_AMD64_REL32
+
+      - !Relocation
+        VirtualAddress: 0x14
+        SymbolTableIndex: 6
+        Type: IMAGE_REL_AMD64_REL32
+
+      - !Relocation
+        VirtualAddress: 0x19
+        SymbolTableIndex: 7
+        Type: IMAGE_REL_AMD64_REL32
+
+  - !Section
+    Name: .data
+    Characteristics: [IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_ALIGN_1BYTES, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE, ] # 0xc0100040
+    SectionData:  !hex "48656C6C6F20576F726C642100" # |Hello World!.|
+
+symbols:
+  - !Symbol
+    Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "260000000300000000000000010000000000" # |&.................|
+
+  - !Symbol
+    Name: .data
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxillaryData:  !hex "0D0000000000000000000000020000000000" # |..................|
+
+  - !Symbol
+    Name: main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_FUNCTION # (2)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: L.str
+    Value: 0
+    SectionNumber: 2
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+
+  - !Symbol
+    Name: puts
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+  - !Symbol
+    Name: SomeOtherFunction
+    Value: 0
+    SectionNumber: 0
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
diff --git a/test/Object/Inputs/trivial-object-test.coff-i386 b/test/Object/Inputs/trivial-object-test.coff-i386
index 8cfd994..d4ab63b 100644
--- a/test/Object/Inputs/trivial-object-test.coff-i386
+++ b/test/Object/Inputs/trivial-object-test.coff-i386
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index e5635ab..8fd1c04 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,7 +1,7 @@
-RUN: llvm-nm %p/Inputs/trivial-object-test.coff-i386 \
-RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/Inputs/trivial-object-test.coff-x86-64 \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm \
 RUN:         | FileCheck %s -check-prefix COFF
+RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm \
+RUN          | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
@@ -30,4 +30,4 @@ macho: 00000000 U _puts
 macho64: 00000028 s L_.str
 macho64: 00000000 u _SomeOtherFunction
 macho64: 00000000 s _main
-macho64: 00000000 u _puts
-\ No newline at end of file
+macho64: 00000000 u _puts
diff --git a/test/Object/objdump-file-header.test b/test/Object/objdump-file-header.test
index 3fce3f4..a552113 100644
--- a/test/Object/objdump-file-header.test
+++ b/test/Object/objdump-file-header.test
@@ -1,5 +1,4 @@
-RUN: llvm-objdump -f %p/Inputs/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -f - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -f %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
index 581e75e..f9c4f43 100644
--- a/test/Object/objdump-section-content.test
+++ b/test/Object/objdump-section-content.test
@@ -1,9 +1,8 @@
-RUN: llvm-objdump -s %p/Inputs/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -s - | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 
-COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: file format
 COFF-i386: Contents of section .text:
 COFF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
 COFF-i386:  0010 0000e800 000000e8 00000000 8b442408  .............D$.
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
index 8a0f440..989ec04 100644
--- a/test/Object/objdump-symbol-table.test
+++ b/test/Object/objdump-symbol-table.test
@@ -1,17 +1,17 @@
-RUN: llvm-objdump -t %p/Inputs/trivial-object-test.coff-i386 \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-objdump -t - \
 RUN:              | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
 RUN:              | FileCheck %s -check-prefix macho-i386
 
-COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: file format
 COFF-i386: SYMBOL TABLE:
 COFF-i386: [  0](sec  1)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .text
 COFF-i386: AUX scnlen 0x24 nreloc 3 nlnno 0 checksum 0x0 assoc 1 comdat 0
 COFF-i386: [  2](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .data
 COFF-i386: AUX scnlen 0xd nreloc 0 nlnno 0 checksum 0x0 assoc 2 comdat 0
-COFF-i386: [  4](sec  1)(fl 0x00)(ty 200)(scl   2) (nx 0) 0x00000000 _main
+COFF-i386: [  4](sec  1)(fl 0x00)(ty  20)(scl   2) (nx 0) 0x00000000 _main
 COFF-i386: [  5](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 0) 0x00000000 L_.str
 COFF-i386: [  6](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _puts
 COFF-i386: [  7](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _SomeOtherFunction
@@ -30,4 +30,4 @@ macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i38
 macho-i386: SYMBOL TABLE:
 macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
 macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
-macho-i386: 00000000         *UND*  00000000 _puts
-\ No newline at end of file
+macho-i386: 00000000         *UND*  00000000 _puts
diff --git a/test/Transforms/CodeGenPrepare/basic.ll b/test/Transforms/CodeGenPrepare/basic.ll
index ebf10f0..c68e77e 100644
--- a/test/Transforms/CodeGenPrepare/basic.ll
+++ b/test/Transforms/CodeGenPrepare/basic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 ; CHECK: @test1
 ; objectsize should fold to a constant, which causes the branch to fold to an
-; uncond branch.
+; uncond branch. Next, we fold the control flow alltogether.
 ; rdar://8785296
 define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
 entry:
@@ -13,8 +13,8 @@ entry:
   %1 = icmp ugt i64 %0, 3
   br i1 %1, label %T, label %trap
 
-; CHECK: entry:
-; CHECK-NEXT: br label %T
+; CHECK: T:
+; CHECK-NOT: br label %
 
 trap:                                             ; preds = %0, %entry
   tail call void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index a386206..7a8cdd5 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -276,3 +276,37 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
 ; CHECK-NEXT: ret void
   ret void
 }
+
+; PR13547
+; CHECK: @test23
+; CHECK: store i8 97
+; CHECK: store i8 0
+declare noalias i8* @strdup(i8* nocapture) nounwind
+define noalias i8* @test23() nounwind uwtable ssp {
+  %x = alloca [2 x i8], align 1
+  %arrayidx = getelementptr inbounds [2 x i8]* %x, i64 0, i64 0
+  store i8 97, i8* %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds [2 x i8]* %x, i64 0, i64 1
+  store i8 0, i8* %arrayidx1, align 1
+  %call = call i8* @strdup(i8* %arrayidx) nounwind
+  ret i8* %call
+}
+
+; Make sure same sized store to later element is deleted
+; CHECK: @test24
+; CHECK-NOT: store i32 0
+; CHECK-NOT: store i32 0
+; CHECK: store i32 %b
+; CHECK: store i32 %c
+; CHECK: ret void
+define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
+  %1 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 0
+  store i32 0, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 1
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 0
+  store i32 %b, i32* %3, align 4
+  %4 = getelementptr inbounds [2 x i32]* %a, i64 0, i64 1
+  store i32 %c, i32* %4, align 4
+  ret void
+}
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
new file mode 100644
index 0000000..32392f3
--- /dev/null
+++ b/test/Transforms/GVN/edge.ll
@@ -0,0 +1,60 @@
+; RUN: opt %s -gvn -S -o - | FileCheck %s
+
+define i32 @f1(i32 %x) {
+  ; CHECK: define i32 @f1(
+bb0:
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f2(i32 %x) {
+  ; CHECK: define i32 @f2(
+bb0:
+  %cmp = icmp ne i32 %x, 0
+  br i1 %cmp, label %bb1, label %bb2
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+define i32 @f3(i32 %x) {
+  ; CHECK: define i32 @f3(
+bb0:
+  switch i32 %x, label %bb1 [ i32 0, label %bb2]
+bb1:
+  br label %bb2
+bb2:
+  %cond = phi i32 [ %x, %bb0 ], [ 0, %bb1 ]
+  %foo = add i32 %cond, %x
+  ret i32 %foo
+  ; CHECK: bb2:
+  ; CHECK: ret i32 %x
+}
+
+declare void @g(i1)
+define void @f4(i8 * %x)  {
+; CHECK: define void @f4(
+bb0:
+  %y = icmp eq i8* null, %x
+  br i1 %y, label %bb2, label %bb1
+bb1:
+  br label %bb2
+bb2:
+  %zed = icmp eq i8* null, %x
+  call void @g(i1 %zed)
+; CHECK: call void @g(i1 %y)
+  ret void
+}
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 9e08004..e764169 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -620,7 +620,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i16*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }
 
 define i32 @test_widening2() nounwind ssp noredzone {
@@ -644,7 +644,7 @@ entry:
 ; CHECK-NOT: load
 ; CHECK: load i32*
 ; CHECK-NOT: load
-; CHECK-ret i32
+; CHECK: ret i32
 }
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
index f426120..a472f10 100644
--- a/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
+++ b/test/Transforms/GlobalOpt/cleanup-pointer-root-users.ll
@@ -1,12 +1,12 @@
 ; RUN: opt -globalopt -S -o - < %s | FileCheck %s
 
-@test1 = internal global i8* null
+@glbl = internal global i8* null
 
 define void @test1a() {
 ; CHECK: @test1a
 ; CHECK-NOT: store
 ; CHECK-NEXT: ret void
-  store i8* null, i8** @test1
+  store i8* null, i8** @glbl
   ret void
 }
 
@@ -14,6 +14,36 @@ define void @test1b(i8* %p) {
 ; CHECK: @test1b
 ; CHECK-NEXT: store
 ; CHECK-NEXT: ret void
-  store i8* %p, i8** @test1
+  store i8* %p, i8** @glbl
   ret void
 }
+
+define void @test2() {
+; CHECK: @test2
+; CHECK: alloca i8
+  %txt = alloca i8
+  call void @foo2(i8* %txt)
+  %call2 = call i8* @strdup(i8* %txt)
+  store i8* %call2, i8** @glbl
+  ret void
+}
+declare i8* @strdup(i8*)
+declare void @foo2(i8*)
+
+define void @test3() uwtable {
+; CHECK: @test3
+; CHECK-NOT: bb1:
+; CHECK-NOT: bb2:
+; CHECK: invoke
+  %ptr = invoke i8* @_Znwm(i64 1)
+          to label %bb1 unwind label %bb2
+bb1:
+  store i8* %ptr, i8** @glbl
+  unreachable
+bb2:
+  %tmp1 = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__gxx_personality_v0
+          cleanup
+  resume { i8*, i32 } %tmp1
+}
+declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+declare i8* @_Znwm(i64)
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
index e0be41f..c918bc9 100644
--- a/test/Transforms/Inline/always-inline.ll
+++ b/test/Transforms/Inline/always-inline.ll
@@ -33,7 +33,6 @@ define void @outer2(i32 %N) {
 ;
 ; CHECK: @outer2
 ; CHECK-NOT: call void @inner2
-; CHECK alloca i32, i32 %N
 ; CHECK-NOT: call void @inner2
 ; CHECK: ret void
 
diff --git a/test/Transforms/Inline/inline-byval-bonus.ll b/test/Transforms/Inline/inline-byval-bonus.ll
new file mode 100644
index 0000000..f3ed819
--- /dev/null
+++ b/test/Transforms/Inline/inline-byval-bonus.ll
@@ -0,0 +1,193 @@
+; RUN: opt -S -inline -inline-threshold=275 < %s | FileCheck %s
+; PR13095
+
+; The performance of the c-ray benchmark largely depends on the inlining of a
+; specific call to @ray_sphere. This test case is designed to verify that it's
+; inlined at -O3.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.sphere = type { %struct.vec3, double, %struct.material, %struct.sphere* }
+%struct.vec3 = type { double, double, double }
+%struct.material = type { %struct.vec3, double, double }
+%struct.ray = type { %struct.vec3, %struct.vec3 }
+%struct.spoint = type { %struct.vec3, %struct.vec3, %struct.vec3, double }
+
+define i32 @caller(%struct.sphere* %i) {
+  %shadow_ray = alloca %struct.ray, align 8
+  call void @fix(%struct.ray* %shadow_ray)
+
+  %call = call i32 @ray_sphere(%struct.sphere* %i, %struct.ray* byval align 8 %shadow_ray, %struct.spoint* null)
+  ret i32 %call
+
+; CHECK: @caller
+; CHECK-NOT: call i32 @ray_sphere
+; CHECK: ret i32
+}
+
+declare void @fix(%struct.ray*)
+
+define i32 @ray_sphere(%struct.sphere* nocapture %sph, %struct.ray* nocapture byval align 8 %ray, %struct.spoint* %sp) nounwind uwtable ssp {
+  %1 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 0
+  %2 = load double* %1, align 8
+  %3 = fmul double %2, %2
+  %4 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 1
+  %5 = load double* %4, align 8
+  %6 = fmul double %5, %5
+  %7 = fadd double %3, %6
+  %8 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 1, i32 2
+  %9 = load double* %8, align 8
+  %10 = fmul double %9, %9
+  %11 = fadd double %7, %10
+  %12 = fmul double %2, 2.000000e+00
+  %13 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 0
+  %14 = load double* %13, align 8
+  %15 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 0
+  %16 = load double* %15, align 8
+  %17 = fsub double %14, %16
+  %18 = fmul double %12, %17
+  %19 = fmul double %5, 2.000000e+00
+  %20 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 1
+  %21 = load double* %20, align 8
+  %22 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 1
+  %23 = load double* %22, align 8
+  %24 = fsub double %21, %23
+  %25 = fmul double %19, %24
+  %26 = fadd double %18, %25
+  %27 = fmul double %9, 2.000000e+00
+  %28 = getelementptr inbounds %struct.ray* %ray, i64 0, i32 0, i32 2
+  %29 = load double* %28, align 8
+  %30 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 0, i32 2
+  %31 = load double* %30, align 8
+  %32 = fsub double %29, %31
+  %33 = fmul double %27, %32
+  %34 = fadd double %26, %33
+  %35 = fmul double %16, %16
+  %36 = fmul double %23, %23
+  %37 = fadd double %35, %36
+  %38 = fmul double %31, %31
+  %39 = fadd double %37, %38
+  %40 = fmul double %14, %14
+  %41 = fadd double %40, %39
+  %42 = fmul double %21, %21
+  %43 = fadd double %42, %41
+  %44 = fmul double %29, %29
+  %45 = fadd double %44, %43
+  %46 = fsub double -0.000000e+00, %16
+  %47 = fmul double %14, %46
+  %48 = fmul double %21, %23
+  %49 = fsub double %47, %48
+  %50 = fmul double %29, %31
+  %51 = fsub double %49, %50
+  %52 = fmul double %51, 2.000000e+00
+  %53 = fadd double %52, %45
+  %54 = getelementptr inbounds %struct.sphere* %sph, i64 0, i32 1
+  %55 = load double* %54, align 8
+  %56 = fmul double %55, %55
+  %57 = fsub double %53, %56
+  %58 = fmul double %34, %34
+  %59 = fmul double %11, 4.000000e+00
+  %60 = fmul double %59, %57
+  %61 = fsub double %58, %60
+  %62 = fcmp olt double %61, 0.000000e+00
+  br i1 %62, label %130, label %63
+
+; <label>:63                                      ; preds = %0
+  %64 = tail call double @sqrt(double %61) nounwind readnone
+  %65 = fsub double -0.000000e+00, %34
+  %66 = fsub double %64, %34
+  %67 = fmul double %11, 2.000000e+00
+  %68 = fdiv double %66, %67
+  %69 = fsub double %65, %64
+  %70 = fdiv double %69, %67
+  %71 = fcmp olt double %68, 1.000000e-06
+  %72 = fcmp olt double %70, 1.000000e-06
+  %or.cond = and i1 %71, %72
+  br i1 %or.cond, label %130, label %73
+
+; <label>:73                                      ; preds = %63
+  %74 = fcmp ogt double %68, 1.000000e+00
+  %75 = fcmp ogt double %70, 1.000000e+00
+  %or.cond1 = and i1 %74, %75
+  br i1 %or.cond1, label %130, label %76
+
+; <label>:76                                      ; preds = %73
+  %77 = icmp eq %struct.spoint* %sp, null
+  br i1 %77, label %130, label %78
+
+; <label>:78                                      ; preds = %76
+  %t1.0 = select i1 %71, double %70, double %68
+  %t2.0 = select i1 %72, double %t1.0, double %70
+  %79 = fcmp olt double %t1.0, %t2.0
+  %80 = select i1 %79, double %t1.0, double %t2.0
+  %81 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 3
+  store double %80, double* %81, align 8
+  %82 = fmul double %80, %2
+  %83 = fadd double %14, %82
+  %84 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 0
+  store double %83, double* %84, align 8
+  %85 = fmul double %5, %80
+  %86 = fadd double %21, %85
+  %87 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 1
+  store double %86, double* %87, align 8
+  %88 = fmul double %9, %80
+  %89 = fadd double %29, %88
+  %90 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 0, i32 2
+  store double %89, double* %90, align 8
+  %91 = load double* %15, align 8
+  %92 = fsub double %83, %91
+  %93 = load double* %54, align 8
+  %94 = fdiv double %92, %93
+  %95 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 0
+  store double %94, double* %95, align 8
+  %96 = load double* %22, align 8
+  %97 = fsub double %86, %96
+  %98 = load double* %54, align 8
+  %99 = fdiv double %97, %98
+  %100 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 1
+  store double %99, double* %100, align 8
+  %101 = load double* %30, align 8
+  %102 = fsub double %89, %101
+  %103 = load double* %54, align 8
+  %104 = fdiv double %102, %103
+  %105 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 1, i32 2
+  store double %104, double* %105, align 8
+  %106 = fmul double %2, %94
+  %107 = fmul double %5, %99
+  %108 = fadd double %106, %107
+  %109 = fmul double %9, %104
+  %110 = fadd double %108, %109
+  %111 = fmul double %110, 2.000000e+00
+  %112 = fmul double %94, %111
+  %113 = fsub double %112, %2
+  %114 = fsub double -0.000000e+00, %113
+  %115 = fmul double %99, %111
+  %116 = fsub double %115, %5
+  %117 = fsub double -0.000000e+00, %116
+  %118 = fmul double %104, %111
+  %119 = fsub double %118, %9
+  %120 = fsub double -0.000000e+00, %119
+  %.06 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 0
+  %.18 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 1
+  %.210 = getelementptr inbounds %struct.spoint* %sp, i64 0, i32 2, i32 2
+  %121 = fmul double %113, %113
+  %122 = fmul double %116, %116
+  %123 = fadd double %121, %122
+  %124 = fmul double %119, %119
+  %125 = fadd double %123, %124
+  %126 = tail call double @sqrt(double %125) nounwind readnone
+  %127 = fdiv double %114, %126
+  store double %127, double* %.06, align 8
+  %128 = fdiv double %117, %126
+  store double %128, double* %.18, align 8
+  %129 = fdiv double %120, %126
+  store double %129, double* %.210, align 8
+  br label %130
+
+; <label>:130                                     ; preds = %78, %76, %73, %63, %0
+  %.0 = phi i32 [ 0, %0 ], [ 0, %73 ], [ 0, %63 ], [ 1, %76 ], [ 1, %78 ]
+  ret i32 %.0
+}
+
+declare double @sqrt(double) nounwind readnone
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index dc35b60..0b48a72 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -110,3 +110,65 @@ bb.merge:
 bb.false:
   ret i32 %sub
 }
+
+
+define i32 @PR13412.main() {
+; This is a somewhat complicated three layer subprogram that was reported to
+; compute the wrong value for a branch due to assuming that an argument
+; mid-inline couldn't be equal to another pointer.
+;
+; After inlining, the branch should point directly to the exit block, not to
+; the intermediate block.
+; CHECK: @PR13412.main
+; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]]
+; CHECK: [[FALSE_DEST]]:
+; CHECK-NEXT: call void @PR13412.fail()
+; CHECK: [[TRUE_DEST]]:
+; CHECK-NEXT: ret i32 0
+
+entry:
+  %i1 = alloca i64
+  store i64 0, i64* %i1
+  %arraydecay = bitcast i64* %i1 to i32*
+  %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay)
+  br i1 %call, label %cond.end, label %cond.false
+
+cond.false:
+  call void @PR13412.fail()
+  br label %cond.end
+
+cond.end:
+  ret i32 0
+}
+
+define internal i1 @PR13412.first(i32* %a, i32* %b) {
+entry:
+  %call = call i32* @PR13412.second(i32* %a, i32* %b)
+  %cmp = icmp eq i32* %call, %b
+  ret i1 %cmp
+}
+
+declare void @PR13412.fail()
+
+define internal i32* @PR13412.second(i32* %a, i32* %b) {
+entry:
+  %sub.ptr.lhs.cast = ptrtoint i32* %b to i64
+  %sub.ptr.rhs.cast = ptrtoint i32* %a to i64
+  %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2
+  %cmp = icmp ugt i64 %sub.ptr.div, 1
+  br i1 %cmp, label %if.then, label %if.end3
+
+if.then:
+  %0 = load i32* %a
+  %1 = load i32* %b
+  %cmp1 = icmp eq i32 %0, %1
+  br i1 %cmp1, label %return, label %if.end3
+
+if.end3:
+  br label %return
+
+return:
+  %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ]
+  ret i32* %retval.0
+}
diff --git a/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
index c636288..f33a1f5 100644
--- a/test/Transforms/InstCombine/2008-11-08-FCmp.ll
+++ b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -45,3 +45,12 @@ define i1 @test6(i32 %val) {
   ret i1 %2
 ; CHECK: ret i1 false
 }
+
+; Check that optimizing unsigned >= comparisons correctly distinguishes
+; positive and negative constants.  <rdar://problem/12029145>
+define i1 @test7(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp oge double %1, 3.200000e+00
+  ret i1 %2
+; CHECK: icmp ugt i32 %val, 3
+}
diff --git a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
new file mode 100644
index 0000000..73e5a66
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR13442
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+
+@test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+define i64 @foo() {
+  %ret = load i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
+  ret i64 %ret
+  ; CHECK: ret i64 844424930263040
+}
diff --git a/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
new file mode 100644
index 0000000..6f3df5b
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-07-30-addrsp-bitcast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: bitcast
+
+@base = internal addrspace(3) unnamed_addr global [16 x i32] zeroinitializer, align 16
+declare void @foo(i32*)
+
+define void @test() nounwind {
+  call void @foo(i32* getelementptr (i32* bitcast ([16 x i32] addrspace(3)* @base to i32*), i64 2147483647)) nounwind
+  ret void
+}
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index d5af532..2ef6ac6 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -132,12 +132,14 @@ define i32 @test5a() {
 }
 
 define void @test5() {
-       store i1 true, i1* undef
-       %1 = invoke i32 @test5a() to label %exit unwind label %exit
+  store i1 true, i1* undef
+  %r = invoke i32 @test5a() to label %exit unwind label %unwind
+unwind:
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+          cleanup
+  br label %exit
 exit:
-       %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                cleanup
-       ret void
+  ret void
 }
 
 
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index d08cbf5..fc1ced0 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -69,3 +69,93 @@ define float @test8(float %x) nounwind readnone optsize ssp {
 ; CHECK: @test8
 ; CHECK-NEXT: fcmp olt float %x, 0.000000e+00
 }
+
+declare double @fabs(double) nounwind readnone
+
+define i32 @test9(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp olt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test9
+; CHECK-NOT: fabs
+; CHECK: ret i32 0
+}
+
+define i32 @test10(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp ole double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test10
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
+define i32 @test11(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp ogt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test11
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
+define i32 @test12(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp oge double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test12
+; CHECK-NOT: fabs
+; CHECK: fcmp ord double %a, 0.000000e+00
+}
+
+define i32 @test13(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp une double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test13
+; CHECK-NOT: fabs
+; CHECK: fcmp une double %a, 0.000000e+00
+}
+
+define i32 @test14(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp oeq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test14
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
+define i32 @test15(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp one double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test15
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
+define i32 @test16(double %a) nounwind {
+  %call = tail call double @fabs(double %a) nounwind
+  %cmp = fcmp ueq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: @test16
+; CHECK-NOT: fabs
+; CHECK: fcmp ueq double %a, 0.000000e+00
+}
+
+; Don't crash.
+define i32 @test17(double %a, double (double)* %p) nounwind {
+  %call = tail call double %p(double %a) nounwind
+  %cmp = fcmp ueq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 5557a8f..83c893e 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 @C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
 
@@ -6,13 +6,11 @@ define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
 entry:
 	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
 	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i32 512, i32 16 )
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
         
 ; CHECK: @test1
 ; CHECK-NOT: alloca
 ; CHECK-NOT: call{{.*}}@llvm.memcpy
-; CHECK: %lookupTable1 = bitcast [128 x float]* @C.0.1248 to i8*
-; CHECK-NOT: call{{.*}}@llvm.memcpy
         
 	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
@@ -38,10 +36,6 @@ entry:
 	ret float %tmp43
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-
-
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 %T = type { i8, [123 x i8] }
@@ -59,10 +53,11 @@ define void @test2() {
 ; CHECK: @test2
 
 ; %A alloca is deleted
-; CHECK-NEXT: %B = alloca %T
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr inbounds [124 x i8]*
 
 ; use @G instead of %A
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* getelementptr inbounds (%T* @G, i64 0, i32 0)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
   call void @bar(i8* %b)
@@ -79,8 +74,7 @@ define void @test3() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK: @test3
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
 
@@ -90,8 +84,7 @@ define void @test4() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a) 
 ; CHECK: @test4
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @baz(i8* byval %a)
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
 
@@ -103,8 +96,7 @@ define void @test5() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
   call void @baz(i8* byval %a) 
 ; CHECK: @test5
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @baz(i8* byval %a)
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
   ret void
 }
 
@@ -118,8 +110,7 @@ define void @test6() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK: @test6
-; CHECK-NEXT: %a = bitcast
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
   ret void
 }
 
@@ -129,8 +120,7 @@ define void @test7() {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
   call void @bar(i8* %a) readonly
 ; CHECK: @test7
-; CHECK-NEXT: %a = bitcast
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
   ret void
 }
 
diff --git a/test/Transforms/InstCombine/memcpy.ll b/test/Transforms/InstCombine/memcpy.ll
index 8a2e3aa..3a68ff9 100644
--- a/test/Transforms/InstCombine/memcpy.ll
+++ b/test/Transforms/InstCombine/memcpy.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define void @test1(i8* %a) {
         tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
@@ -17,3 +18,10 @@ define void @test2(i8* %a) {
 ; CHECK: define void @test2
 ; CHECK-NEXT: call void @llvm.memcpy
 }
+
+define void @test3(i8* %d, i8* %s) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 17179869184, i32 4, i1 false)
+        ret void
+; CHECK: define void @test3
+; CHECK-NEXT: call void @llvm.memcpy
+}
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index f5a4581..31a3cb4 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -171,3 +171,88 @@ define i32 @test8(i8** %esc) {
 ; CHECK: ret i32 30
   ret i32 %objsize
 }
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare noalias i8* @strndup(i8* nocapture, i32) nounwind
+
+; CHECK: @test9
+define i32 @test9(i8** %esc) {
+  %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0)) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test10
+define i32 @test10(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 4
+  ret i32 %1
+}
+
+; CHECK: @test11
+define i32 @test11(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test12
+define i32 @test12(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @test13
+define i32 @test13(i8** %esc) {
+  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
+  store i8* %call, i8** %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %call, i1 true)
+; CHECK: ret i32 8
+  ret i32 %1
+}
+
+; CHECK: @PR13390
+define i32 @PR13390(i1 %bool, i8* %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+xpto:
+  %select = select i1 %bool, i8* %select, i8* %a
+  %select2 = select i1 %bool, i8* %a, i8* %select2
+  %0 = tail call i32 @llvm.objectsize.i32(i8* %select, i1 true)
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %select2, i1 true)
+  %2 = add i32 %0, %1
+; CHECK: ret i32 undef
+  ret i32 %2
+
+return:
+  ret i32 42
+}
+
+; CHECK: @PR13621
+define i32 @PR13621(i1 %bool) nounwind {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %gep2 = getelementptr i8* %gep, i32 1
+  %gep = getelementptr i8* %gep2, i32 1
+  %o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
+; CHECK: ret i32 undef
+  ret i32 %o
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
index 18af152..946ea2b 100644
--- a/test/Transforms/InstCombine/select-crash.ll
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -30,3 +30,20 @@ define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
   %sel = select i1 %b, <4 x float> %a, <4 x float> %sub 
   ret <4 x float> %sel
 }
+
+; CHECK: @test3
+define i32 @test3(i1 %bool, i32 %a) {
+entry:
+  %cond = or i1 %bool, true
+  br i1 %cond, label %return, label %xpto
+
+; technically reachable, but this malformed IR may appear as a result of constant propagation
+xpto:
+  %select = select i1 %bool, i32 %a, i32 %select
+  %select2 = select i1 %bool, i32 %select2, i32 %a
+  %sum = add i32 %select, %select2
+  ret i32 %sum
+
+return:
+  ret i32 7
+}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index cc63371..0019a57 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -162,4 +162,51 @@ entry:
   ret <4 x float> %shuffle9.i
 }
 
+define <2 x float> @test_fptrunc(double %f) {
+; CHECK: @test_fptrunc
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK-NOT: insertelement
+  %tmp9 = insertelement <4 x double> undef, double %f, i32 0
+  %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x double> %tmp11, double 0.000000e+00, i32 3
+  %tmp5 = fptrunc <4 x double> %tmp12 to <4 x float>
+  %ret = shufflevector <4 x float> %tmp5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %ret
+}
+
+define <2 x double> @test_fpext(float %f) {
+; CHECK: @test_fpext
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK-NOT: insertelement
+  %tmp9 = insertelement <4 x float> undef, float %f, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp5 = fpext <4 x float> %tmp12 to <4 x double>
+  %ret = shufflevector <4 x double> %tmp5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %ret
+}
+
+define <4 x float> @test_select(float %f, float %g) {
+; CHECK: @test_select
+; CHECK: %a0 = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NOT: insertelement
+; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
+; CHECK-NOT: insertelement
+; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+  %a0 = insertelement <4 x float> undef, float %f, i32 0
+  %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
+  %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
+  %a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
+  %b0 = insertelement <4 x float> undef, float %g, i32 0
+  %b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
+  %b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
+  %b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
+  %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
+  ret <4 x float> %ret
+}
+
 
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
new file mode 100644
index 0000000..b016265
--- /dev/null
+++ b/test/Transforms/LICM/promote-order.ll
@@ -0,0 +1,41 @@
+; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
+
+; LICM should keep the stores in their original order when it sinks/promotes them.
+; rdar://12045203
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@p = external global i8*
+
+define i32* @_Z4doiti(i32 %n, float* %tmp1, i32* %tmp3) nounwind {
+entry:
+  %cmp1 = icmp slt i32 0, %n
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  store float 1.000000e+00, float* %tmp1, align 4, !tbaa !1
+  store i32 1, i32* %tmp3, align 4, !tbaa !2
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK: store float 1.000000e+00, float* %tmp1
+; CHECK: store i32 1, i32* %tmp3
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %split = phi i32* [ %tmp3, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %r.0.lcssa = phi i32* [ %split, %for.cond.for.end_crit_edge ], [ undef, %entry ]
+  ret i32* %r.0.lcssa
+}
+
+!0 = metadata !{metadata !"minimal TBAA"}
+!1 = metadata !{metadata !"float", metadata !0}
+!2 = metadata !{metadata !"int", metadata !0}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index 70ead33..b5124ea 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -44,7 +44,7 @@ declare %s* @getstruct() nounwind
 
 ; CHECK: @main
 ; Check that the loop preheader contains no address computation.
-; CHECK: %entry
+; CHECK: %end_of_chain
 ; CHECK-NOT: add{{.*}}lsl
 ; CHECK: ldr{{.*}}lsl #2
 ; CHECK: ldr{{.*}}lsl #2
@@ -65,15 +65,15 @@ while.cond:
 
 while.body:
   %v3 = load i32* @ncol, align 4, !tbaa !0
-  br label %while.cond.i
+  br label %end_of_chain
 
-while.cond.i:
+end_of_chain:
   %state.i = getelementptr inbounds %s* %call18, i32 0, i32 0
   %v4 = load i32** %state.i, align 4, !tbaa !3
   br label %while.cond.i.i
 
 while.cond.i.i:
-  %counter.0.i.i = phi i32 [ %v3, %while.cond.i ], [ %dec.i.i, %land.rhs.i.i ]
+  %counter.0.i.i = phi i32 [ %v3, %end_of_chain ], [ %dec.i.i, %land.rhs.i.i ]
   %dec.i.i = add nsw i32 %counter.0.i.i, -1
   %tobool.i.i = icmp eq i32 %counter.0.i.i, 0
   br i1 %tobool.i.i, label %where.exit, label %land.rhs.i.i
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index a9b815f..c3b8b89 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -99,7 +99,6 @@ while.end:                                        ; preds = %entry
 ; CHECK: %for.body3.lr.ph.us.i.loopexit
 ; CHECK-NEXT: in Loop: Header
 ; CHECK-NEXT: incq
-; CHECK-NEXT: .align
 ; CHECK-NEXT: %for.body3.us.i
 ; CHECK-NEXT: Inner Loop
 ; CHECK: testb
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index d9bb3f2..0a7ba5d 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1272,7 +1272,7 @@ g:
 ; Delete retain,release pairs around loops.
 
 ; CHECK: define void @test39(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39(i8* %p) {
 entry:
@@ -1290,7 +1290,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing uses.
 
 ; CHECK: define void @test39b(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39b(i8* %p) {
 entry:
@@ -1309,7 +1309,7 @@ exit:                                             ; preds = %loop
 ; Delete retain,release pairs around loops containing potential decrements.
 
 ; CHECK: define void @test39c(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test39c(i8* %p) {
 entry:
@@ -1329,7 +1329,7 @@ exit:                                             ; preds = %loop
 ; the successors are in a different order.
 
 ; CHECK: define void @test40(
-; CHECK_NOT: @objc_
+; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test40(i8* %p) {
 entry:
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 76e82a5..1a58e34 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -76,12 +76,12 @@ done:
 ; CHECK: define void @test2() {
 ; CHECK: invoke.cont:
 ; CHECK-NEXT: call i8* @objc_retain
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc_r
 ; CHECK: finally.cont:
 ; CHECK-NEXT: call void @objc_release
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: finally.rethrow:
-; CHEK-NOT: @objc
+; CHECK-NOT: @objc
 ; CHECK: }
 define void @test2() {
 entry:
diff --git a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll b/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
deleted file mode 100644
index 64aba46..0000000
--- a/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -prune-eh -S | grep invoke
-
-declare void @External()
-
-define void @foo() {
-	invoke void @External( )
-			to label %Cont unwind label %Cont
-Cont:		; preds = %0, %0
-        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                 cleanup
-	ret void
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
index 1e522e6..2f5a53e 100644
--- a/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
+++ b/test/Transforms/Reassociate/2012-05-08-UndefLeak.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -reassociate -S | FileCheck %s
 ; PR12169
 ; PR12764
+; XFAIL: *
+; Transform disabled until PR13021 is fixed.
 
 define i64 @f(i64 %x0) {
 ; CHECK: @f
diff --git a/test/Transforms/Reassociate/mightymul.ll b/test/Transforms/Reassociate/mightymul.ll
new file mode 100644
index 0000000..cfbc485
--- /dev/null
+++ b/test/Transforms/Reassociate/mightymul.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate
+; PR13021
+
+define i32 @foo(i32 %x) {
+  %t0 = mul i32 %x, %x
+  %t1 = mul i32 %t0, %t0
+  %t2 = mul i32 %t1, %t1
+  %t3 = mul i32 %t2, %t2
+  %t4 = mul i32 %t3, %t3
+  %t5 = mul i32 %t4, %t4
+  %t6 = mul i32 %t5, %t5
+  %t7 = mul i32 %t6, %t6
+  %t8 = mul i32 %t7, %t7
+  %t9 = mul i32 %t8, %t8
+  %t10 = mul i32 %t9, %t9
+  %t11 = mul i32 %t10, %t10
+  %t12 = mul i32 %t11, %t11
+  %t13 = mul i32 %t12, %t12
+  %t14 = mul i32 %t13, %t13
+  %t15 = mul i32 %t14, %t14
+  %t16 = mul i32 %t15, %t15
+  %t17 = mul i32 %t16, %t16
+  %t18 = mul i32 %t17, %t17
+  %t19 = mul i32 %t18, %t18
+  %t20 = mul i32 %t19, %t19
+  %t21 = mul i32 %t20, %t20
+  %t22 = mul i32 %t21, %t21
+  %t23 = mul i32 %t22, %t22
+  %t24 = mul i32 %t23, %t23
+  %t25 = mul i32 %t24, %t24
+  %t26 = mul i32 %t25, %t25
+  %t27 = mul i32 %t26, %t26
+  %t28 = mul i32 %t27, %t27
+  ret i32 %t28
+}
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index cd4dc32..58c5a3a 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -260,5 +260,27 @@ entry:
   ret void
 }
 
+; rdar://11861001 - The dynamic GEP here was incorrectly making all accesses
+; to the alloca think they were also dynamic.  Inserts and extracts created to
+; access the vector were all being based from the dynamic access, even in BBs
+; not dominated by the GEP.
+define fastcc void @test() optsize inlinehint ssp align 2 {
+entry:
+  %alloc.0.0 = alloca <4 x float>, align 16
+  %bitcast = bitcast <4 x float>* %alloc.0.0 to [4 x float]*
+  %idx3 = getelementptr inbounds [4 x float]* %bitcast, i32 0, i32 3
+  store float 0.000000e+00, float* %idx3, align 4
+  br label %for.body10
+
+for.body10:                                       ; preds = %for.body10, %entry
+  %loopidx = phi i32 [ 0, %entry ], [ undef, %for.body10 ]
+  %unusedidx = getelementptr inbounds <4 x float>* %alloc.0.0, i32 0, i32 %loopidx
+  br i1 undef, label %for.end, label %for.body10
+
+for.end:                                          ; preds = %for.body10
+  store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00>, <4 x float>* %alloc.0.0, align 16
+  ret void
+}
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll b/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
deleted file mode 100644
index bc61a75..0000000
--- a/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Do not remove the invoke!
-;
-; RUN: opt < %s -simplifycfg -S | grep invoke
-
-define i32 @test() {
-	invoke i32 @test( )
-			to label %Ret unwind label %Ret		; <i32>:1 [#uses=0]
-Ret:		; preds = %0, %0
-        %val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                 catch i8* null
-	%A = add i32 0, 1		; <i32> [#uses=1]
-	ret i32 %A
-}
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
deleted file mode 100644
index 27d9d8f..0000000
--- a/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
+++ /dev/null
@@ -1,567 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; END.
-	%struct..4._102 = type { %struct.QVectorData* }
-	%struct..5._125 = type { %struct.QMapData* }
-	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
-	%struct.QBasicAtomic = type { i32 }
-	%struct.QFont = type { %struct.QFontPrivate*, i32 }
-	%struct.QFontMetrics = type { %struct.QFontPrivate* }
-	%struct.QFontPrivate = type opaque
-	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
-	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
-	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
-	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
-	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
-	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
-	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
-	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
-	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
-	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
-	%struct.QListData = type { %"struct.QListData::Data"* }
-	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
-	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
-	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
-	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
-	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
-	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
-	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
-	%struct.QPaintDevice = type { i32 (...)**, i16 }
-	%struct.QPainter = type { %struct.QPainterPrivate* }
-	%struct.QPainterPrivate = type opaque
-	%struct.QPointF = type { double, double }
-	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
-	%struct.QPrinterPrivate = type opaque
-	%struct.QRectF = type { double, double, double, double }
-	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
-	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
-	%struct.QString = type { %"struct.QString::Data"* }
-	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
-	%struct.QTextBlockFormat = type { %struct.QTextFormat }
-	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
-	%struct.QTextDocumentConfig = type { %struct.QString }
-	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
-	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
-	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
-	%struct.QTextFormatPrivate = type opaque
-	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
-	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
-
-define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
-entry:
-	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
-	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
-	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
-	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
-	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
-	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
-	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
-	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
-	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
-	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
-	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
-	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
-			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
-invcont:		; preds = %entry
-	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
-cond_next:		; preds = %invcont
-	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
-invcont7:		; preds = %cond_next
-	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
-	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
-	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
-	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
-	br i1 %tmp13, label %cond_next15, label %bb
-cond_next15:		; preds = %invcont7
-	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
-	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
-	br i1 %tmp18, label %bb, label %cond_next20
-cond_next20:		; preds = %cond_next15
-	br label %bb21
-bb:		; preds = %cond_next15, %invcont7
-	br label %bb21
-bb21:		; preds = %bb, %cond_next20
-	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
-	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
-cond_true24:		; preds = %bb21
-	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
-			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
-invcont25:		; preds = %cond_true24
-	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
-	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont29:		; preds = %invcont25
-	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
-			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
-invcont31:		; preds = %invcont29
-	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
-	br i1 %tmp34, label %cond_next42, label %cond_true35
-cond_true35:		; preds = %invcont31
-	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
-invcont37:		; preds = %cond_true35
-	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
-	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
-invcont40:		; preds = %invcont37
-	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
-	br label %cond_next42
-cond_next42:		; preds = %invcont40, %invcont31
-	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
-			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
-invcont45:		; preds = %cond_next42
-	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
-	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
-	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
-			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
-invcont51:		; preds = %invcont45
-	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
-	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
-			to label %invcont57 unwind label %cleanup329
-invcont57:		; preds = %invcont51
-	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
-	%tmp61 = load double* %tmp60		; <double> [#uses=1]
-	store double %tmp61, double* %tmp.upgrd.13
-	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
-	%tmp64 = load double* %tmp63		; <double> [#uses=1]
-	store double %tmp64, double* %tmp62
-	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
-	store double %tmp69, double* %tmp65
-	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
-	store double %tmp75, double* %tmp71
-	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
-			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
-invcont79:		; preds = %invcont57
-	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
-			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
-invcont83:		; preds = %invcont79
-	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
-	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
-	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
-	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
-	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
-			to label %cond_next194 unwind label %cleanup329
-cond_false:		; preds = %bb21
-	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
-	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
-			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
-invcont94:		; preds = %cond_false
-	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont98:		; preds = %invcont94
-	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont100:		; preds = %invcont98
-	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
-			to label %invcont103 unwind label %cleanup329
-invcont103:		; preds = %invcont100
-	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont104:		; preds = %invcont103
-	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
-			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
-invcont106:		; preds = %invcont104
-	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
-	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
-	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
-			to label %invcont111 unwind label %cleanup329
-invcont111:		; preds = %invcont106
-	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
-			to label %invcont114 unwind label %cleanup192
-invcont114:		; preds = %invcont111
-	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
-			to label %invcont117 unwind label %cleanup192
-invcont117:		; preds = %invcont114
-	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont118:		; preds = %invcont117
-	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
-			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
-invcont120:		; preds = %invcont118
-	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
-	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont122:		; preds = %invcont120
-	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
-			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
-invcont124:		; preds = %invcont122
-	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
-	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
-	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp128 = load double* %tmp127		; <double> [#uses=1]
-	store double %tmp128, double* %tmp126
-	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp131 = load double* %tmp130		; <double> [#uses=1]
-	store double %tmp131, double* %tmp129
-	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp134 = load double* %tmp133		; <double> [#uses=1]
-	store double %tmp134, double* %tmp132
-	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp137 = load double* %tmp136		; <double> [#uses=1]
-	store double %tmp137, double* %tmp135
-	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
-	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont141:		; preds = %invcont124
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont144 unwind label %cleanup192
-invcont144:		; preds = %invcont141
-	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
-			to label %invcont146 unwind label %cleanup173
-invcont146:		; preds = %invcont144
-	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
-invcont148:		; preds = %invcont146
-	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
-	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
-invcont151:		; preds = %invcont148
-	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
-			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
-invcont153:		; preds = %invcont151
-	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
-	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
-	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
-	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
-	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp163 = load double* %tmp162		; <double> [#uses=1]
-	store double %tmp163, double* %tmp161
-	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp166 = load double* %tmp165		; <double> [#uses=1]
-	store double %tmp166, double* %tmp164
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup171 unwind label %cleanup173
-cleanup168:		; preds = %invcont151, %invcont148, %invcont146
-        %val168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup173 unwind label %cleanup173
-cleanup171:		; preds = %invcont153
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %finally170 unwind label %cleanup192
-cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
-        %val173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %cleanup192 unwind label %cleanup192
-finally170:		; preds = %cleanup171
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont177 unwind label %cleanup192
-invcont177:		; preds = %finally170
-	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
-			to label %invcont179 unwind label %cleanup187
-invcont179:		; preds = %invcont177
-	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
-			to label %invcont181 unwind label %cleanup187
-invcont181:		; preds = %invcont179
-	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
-	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
-			to label %cleanup185 unwind label %cleanup187
-cleanup185:		; preds = %invcont181
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup190 unwind label %cleanup192
-cleanup187:		; preds = %invcont181, %invcont179, %invcont177
-        %val187 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup192 unwind label %cleanup192
-cleanup190:		; preds = %cleanup185
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cond_next194 unwind label %cleanup329
-cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
-        %val192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cleanup329 unwind label %cleanup329
-cond_next194:		; preds = %cleanup190, %invcont83
-	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
-	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
-	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
-invcont196:		; preds = %cond_next194
-	br i1 %tmp197, label %cond_true200, label %cond_false204
-cond_true200:		; preds = %invcont196
-	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont202 unwind label %cleanup329		; <i32> [#uses=1]
-invcont202:		; preds = %cond_true200
-	br label %cond_next208
-cond_false204:		; preds = %invcont196
-	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont206 unwind label %cleanup329		; <i32> [#uses=1]
-invcont206:		; preds = %cond_false204
-	br label %cond_next208
-cond_next208:		; preds = %invcont206, %invcont202
-	%pageCopies.0 = phi i32 [ %tmp203, %invcont202 ], [ 1, %invcont206 ]		; <i32> [#uses=2]
-	%docCopies.0 = phi i32 [ 1, %invcont202 ], [ %tmp207, %invcont206 ]		; <i32> [#uses=2]
-	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
-			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
-invcont210:		; preds = %cond_next208
-	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
-			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
-invcont213:		; preds = %invcont210
-	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
-	br i1 %tmp216, label %cond_true217, label %cond_next225
-cond_true217:		; preds = %invcont213
-	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
-	br i1 %tmp219, label %cond_true220, label %cond_next225
-cond_true220:		; preds = %cond_true217
-	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
-			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
-invcont222:		; preds = %cond_true220
-	br label %cond_next225
-cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
-	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
-	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
-	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
-			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
-invcont227:		; preds = %cond_next225
-	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
-	br i1 %tmp228, label %cond_true230, label %cond_next234
-cond_true230:		; preds = %invcont227
-	br label %cond_next234
-cond_next234:		; preds = %cond_true230, %invcont227
-	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
-	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
-	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
-	br label %bb309
-bb237:		; preds = %cond_true313, %cond_next293
-	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
-	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
-	br label %bb273
-invcont240:		; preds = %cond_true277
-	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
-	br i1 %tmp242, label %bb252, label %cond_next244
-cond_next244:		; preds = %invcont240
-	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
-invcont246:		; preds = %cond_next244
-	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
-	br i1 %tmp248, label %bb252, label %bb253
-bb252:		; preds = %invcont246, %invcont240
-	br label %bb254
-bb253:		; preds = %invcont246
-	br label %bb254
-bb254:		; preds = %bb253, %bb252
-	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
-	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
-cond_next258:		; preds = %bb254
-	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
-			to label %invcont261 unwind label %cleanup329
-invcont261:		; preds = %cond_next258
-	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
-	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
-	br i1 %tmp265, label %cond_true266, label %cond_next270
-cond_true266:		; preds = %invcont261
-	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next270:		; preds = %cond_true266, %invcont261
-	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
-	br label %bb273
-bb273:		; preds = %cond_next270, %bb237
-	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
-	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
-	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
-	br i1 %tmp276, label %cond_true277, label %bb280
-cond_true277:		; preds = %bb273
-	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
-bb280:		; preds = %bb273
-	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
-	br i1 %tmp283, label %bb297, label %cond_next285
-cond_next285:		; preds = %bb280
-	br i1 %ascending.1, label %cond_true287, label %cond_false290
-cond_true287:		; preds = %cond_next285
-	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_false290:		; preds = %cond_next285
-	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_next293:		; preds = %cond_false290, %cond_true287
-	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
-	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
-bb297:		; preds = %bb280
-	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
-	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
-	br i1 %tmp301, label %cond_true302, label %cond_next306
-cond_true302:		; preds = %bb297
-	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next306:		; preds = %cond_true302, %bb297
-	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
-	br label %bb309
-bb309:		; preds = %cond_next306, %cond_next234
-	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
-	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
-	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
-	br i1 %tmp312, label %cond_true313, label %UserCanceled
-cond_true313:		; preds = %bb309
-	br label %bb237
-UserCanceled:		; preds = %bb309, %bb254
-	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
-	br i1 %tmp318, label %cleanup327, label %cond_true319
-cond_true319:		; preds = %UserCanceled
-	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
-	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
-	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
-	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
-	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
-			to label %cleanup327 unwind label %cleanup329
-cleanup327:		; preds = %cond_true319, %UserCanceled
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup328:		; preds = %invcont
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
-        %val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                 cleanup
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	resume { i8*, i32 } %val
-}
-
-declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
-
-declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
-
-declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
-
-declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
-
-declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
-
-declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
-
-declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
-
-declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
-
-declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
-
-declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
-
-declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
-
-declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
-
-declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
-
-declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
-
-declare void @_ZN5QFontD1Ev(%struct.QFont*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
-
-declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
-
-declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
-
-declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
-
-declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
-
-declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
-
-declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
-
-declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
-
-declare i32 @_Z13qt_defaultDpiv()
-
-declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
-
-declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
-
-declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
-
-declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
-
-declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
-
-declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
-
-declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
-
-declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
-
-declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll b/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
deleted file mode 100644
index abf4455..0000000
--- a/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
+++ /dev/null
@@ -1,569 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; END.
-	%struct..4._102 = type { %struct.QVectorData* }
-	%struct..5._125 = type { %struct.QMapData* }
-	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
-	%struct.QBasicAtomic = type { i32 }
-	%struct.QFont = type { %struct.QFontPrivate*, i32 }
-	%struct.QFontMetrics = type { %struct.QFontPrivate* }
-	%struct.QFontPrivate = type opaque
-	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
-	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
-	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
-	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
-	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
-	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
-	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
-	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
-	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
-	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
-	%struct.QListData = type { %"struct.QListData::Data"* }
-	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
-	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
-	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
-	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
-	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
-	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
-	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
-	%struct.QPaintDevice = type { i32 (...)**, i16 }
-	%struct.QPainter = type { %struct.QPainterPrivate* }
-	%struct.QPainterPrivate = type opaque
-	%struct.QPointF = type { double, double }
-	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
-	%struct.QPrinterPrivate = type opaque
-	%struct.QRectF = type { double, double, double, double }
-	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
-	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
-	%struct.QString = type { %"struct.QString::Data"* }
-	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
-	%struct.QTextBlockFormat = type { %struct.QTextFormat }
-	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
-	%struct.QTextDocumentConfig = type { %struct.QString }
-	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
-	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
-	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
-	%struct.QTextFormatPrivate = type opaque
-	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
-	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
-
-define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
-entry:
-	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
-	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
-	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
-	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
-	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
-	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
-        %foo = alloca double, align 8
-        %bar = alloca double, align 8
-	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
-	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
-	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
-	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
-	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
-	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
-	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
-	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
-			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
-invcont:		; preds = %entry
-	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
-cond_next:		; preds = %invcont
-	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
-invcont7:		; preds = %cond_next
-	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
-	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
-	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
-	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
-	br i1 %tmp13, label %cond_next15, label %bb
-cond_next15:		; preds = %invcont7
-	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
-	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
-	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
-	br i1 %tmp18, label %bb, label %cond_next20
-cond_next20:		; preds = %cond_next15
-	br label %bb21
-bb:		; preds = %cond_next15, %invcont7
-	br label %bb21
-bb21:		; preds = %bb, %cond_next20
-	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
-	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
-cond_true24:		; preds = %bb21
-	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
-			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
-invcont25:		; preds = %cond_true24
-	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
-	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
-			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont29:		; preds = %invcont25
-	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
-			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
-invcont31:		; preds = %invcont29
-	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
-	br i1 %tmp34, label %cond_next42, label %cond_true35
-cond_true35:		; preds = %invcont31
-	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
-invcont37:		; preds = %cond_true35
-	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
-	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
-			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
-invcont40:		; preds = %invcont37
-	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
-	br label %cond_next42
-cond_next42:		; preds = %invcont40, %invcont31
-	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
-	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
-			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
-invcont45:		; preds = %cond_next42
-	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
-	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
-	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
-			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
-invcont51:		; preds = %invcont45
-	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
-	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
-			to label %invcont57 unwind label %cleanup329
-invcont57:		; preds = %invcont51
-	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
-	%tmp61 = load double* %tmp60		; <double> [#uses=1]
-	store double %tmp61, double* %tmp.upgrd.13
-	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
-	%tmp64 = load double* %tmp63		; <double> [#uses=1]
-	store double %tmp64, double* %tmp62
-	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp67 = load double* %tmp65		; <double> [#uses=1]
-	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
-	store double %tmp69, double* %tmp65
-	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
-	%tmp73 = load double* %tmp71		; <double> [#uses=1]
-	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
-	store double %tmp75, double* %tmp71
-	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
-			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
-invcont79:		; preds = %invcont57
-	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
-	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
-			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
-invcont83:		; preds = %invcont79
-	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
-	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
-	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
-	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
-	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
-	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
-	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
-	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
-			to label %cond_next194 unwind label %cleanup329
-cond_false:		; preds = %bb21
-	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
-	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
-			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
-invcont94:		; preds = %cond_false
-	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
-invcont98:		; preds = %invcont94
-	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont100:		; preds = %invcont98
-	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
-			to label %invcont103 unwind label %cleanup329
-invcont103:		; preds = %invcont100
-	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
-invcont104:		; preds = %invcont103
-	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
-			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
-invcont106:		; preds = %invcont104
-	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
-	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
-	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
-	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
-			to label %invcont111 unwind label %cleanup329
-invcont111:		; preds = %invcont106
-	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
-			to label %invcont114 unwind label %cleanup192
-invcont114:		; preds = %invcont111
-	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
-	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
-			to label %invcont117 unwind label %cleanup192
-invcont117:		; preds = %invcont114
-	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont118:		; preds = %invcont117
-	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
-			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
-invcont120:		; preds = %invcont118
-	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
-	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont122:		; preds = %invcont120
-	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
-			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
-invcont124:		; preds = %invcont122
-	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
-	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
-	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp128 = load double* %tmp127		; <double> [#uses=1]
-	store double %tmp128, double* %tmp126
-	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp131 = load double* %tmp130		; <double> [#uses=1]
-	store double %tmp131, double* %tmp129
-	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
-	%tmp134 = load double* %tmp133		; <double> [#uses=1]
-	store double %tmp134, double* %tmp132
-	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
-	%tmp137 = load double* %tmp136		; <double> [#uses=1]
-	store double %tmp137, double* %tmp135
-	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
-	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
-invcont141:		; preds = %invcont124
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont144 unwind label %cleanup192
-invcont144:		; preds = %invcont141
-	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
-			to label %invcont146 unwind label %cleanup173
-invcont146:		; preds = %invcont144
-	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
-invcont148:		; preds = %invcont146
-	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
-	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
-	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
-			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
-invcont151:		; preds = %invcont148
-	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
-			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
-invcont153:		; preds = %invcont151
-	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
-	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
-	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
-	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
-	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
-	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
-	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
-	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
-	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp163 = load double* %tmp162		; <double> [#uses=1]
-	store double %tmp163, double* %tmp161
-	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp166 = load double* %tmp165		; <double> [#uses=1]
-	store double %tmp166, double* %tmp164
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup171 unwind label %cleanup173
-cleanup168:		; preds = %invcont151, %invcont148, %invcont146
-        %val168 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
-			to label %cleanup173 unwind label %cleanup173
-cleanup171:		; preds = %invcont153
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %finally170 unwind label %cleanup192
-cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
-        %val173 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
-			to label %cleanup192 unwind label %cleanup192
-finally170:		; preds = %cleanup171
-	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
-			to label %invcont177 unwind label %cleanup192
-invcont177:		; preds = %finally170
-	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
-			to label %invcont179 unwind label %cleanup187
-invcont179:		; preds = %invcont177
-	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
-			to label %invcont181 unwind label %cleanup187
-invcont181:		; preds = %invcont179
-	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
-	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
-			to label %cleanup185 unwind label %cleanup187
-cleanup185:		; preds = %invcont181
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup190 unwind label %cleanup192
-cleanup187:		; preds = %invcont181, %invcont179, %invcont177
-        %val187 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
-			to label %cleanup192 unwind label %cleanup192
-cleanup190:		; preds = %cleanup185
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cond_next194 unwind label %cleanup329
-cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
-        %val192 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
-			to label %cleanup329 unwind label %cleanup329
-cond_next194:		; preds = %cleanup190, %invcont83
-	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
-	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
-	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
-			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
-invcont196:		; preds = %cond_next194
-	br i1 %tmp197, label %cond_true200, label %cond_false204
-cond_true200:		; preds = %invcont196
-	%tmp2000 = load double* %foo
-	store double %tmp2000, double* %bar
-	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
-cond_false204:		; preds = %invcont196
-	%tmp2001 = load double* %foo
-	store double %tmp2001, double* %bar
-	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
-			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
-cond_next208:		; preds = %invcont206, %invcont202
-	%pageCopies.0 = phi i32 [ %tmp203, %cond_true200 ], [ 1, %cond_false204 ]		; <i32> [#uses=2]
-	%docCopies.0 = phi i32 [ 1, %cond_true200 ], [ %tmp207, %cond_false204 ]		; <i32> [#uses=2]
-	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
-			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
-invcont210:		; preds = %cond_next208
-	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
-			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
-invcont213:		; preds = %invcont210
-	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
-	br i1 %tmp216, label %cond_true217, label %cond_next225
-cond_true217:		; preds = %invcont213
-	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
-	br i1 %tmp219, label %cond_true220, label %cond_next225
-cond_true220:		; preds = %cond_true217
-	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
-			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
-invcont222:		; preds = %cond_true220
-	br label %cond_next225
-cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
-	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
-	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
-	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
-			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
-invcont227:		; preds = %cond_next225
-	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
-	br i1 %tmp228, label %cond_true230, label %cond_next234
-cond_true230:		; preds = %invcont227
-	br label %cond_next234
-cond_next234:		; preds = %cond_true230, %invcont227
-	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
-	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
-	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
-	br label %bb309
-bb237:		; preds = %cond_true313, %cond_next293
-	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
-	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
-	br label %bb273
-invcont240:		; preds = %cond_true277
-	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
-	br i1 %tmp242, label %bb252, label %cond_next244
-cond_next244:		; preds = %invcont240
-	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
-invcont246:		; preds = %cond_next244
-	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
-	br i1 %tmp248, label %bb252, label %bb253
-bb252:		; preds = %invcont246, %invcont240
-	br label %bb254
-bb253:		; preds = %invcont246
-	br label %bb254
-bb254:		; preds = %bb253, %bb252
-	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
-	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
-cond_next258:		; preds = %bb254
-	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
-			to label %invcont261 unwind label %cleanup329
-invcont261:		; preds = %cond_next258
-	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
-	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
-	br i1 %tmp265, label %cond_true266, label %cond_next270
-cond_true266:		; preds = %invcont261
-	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next270:		; preds = %cond_true266, %invcont261
-	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
-	br label %bb273
-bb273:		; preds = %cond_next270, %bb237
-	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
-	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
-	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
-	br i1 %tmp276, label %cond_true277, label %bb280
-cond_true277:		; preds = %bb273
-	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
-			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
-bb280:		; preds = %bb273
-	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
-	br i1 %tmp283, label %bb297, label %cond_next285
-cond_next285:		; preds = %bb280
-	br i1 %ascending.1, label %cond_true287, label %cond_false290
-cond_true287:		; preds = %cond_next285
-	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_false290:		; preds = %cond_next285
-	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
-	br label %cond_next293
-cond_next293:		; preds = %cond_false290, %cond_true287
-	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
-	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
-bb297:		; preds = %bb280
-	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
-	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
-	br i1 %tmp301, label %cond_true302, label %cond_next306
-cond_true302:		; preds = %bb297
-	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
-			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
-cond_next306:		; preds = %cond_true302, %bb297
-	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
-	br label %bb309
-bb309:		; preds = %cond_next306, %cond_next234
-	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
-	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
-	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
-	br i1 %tmp312, label %cond_true313, label %UserCanceled
-cond_true313:		; preds = %bb309
-	br label %bb237
-UserCanceled:		; preds = %bb309, %bb254
-	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
-	br i1 %tmp318, label %cleanup327, label %cond_true319
-cond_true319:		; preds = %UserCanceled
-	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
-	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
-	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
-	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
-	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
-			to label %cleanup327 unwind label %cleanup329
-cleanup327:		; preds = %cond_true319, %UserCanceled
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup328:		; preds = %invcont
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	ret void
-cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
-        %val329 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-                    cleanup
-	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
-	resume { i8*, i32 } %val329
-}
-
-declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
-
-declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
-
-declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
-
-declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
-
-declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
-
-declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
-
-declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
-
-declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
-
-declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
-
-declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
-
-declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
-
-declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
-
-declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
-
-declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
-
-declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
-
-declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
-
-declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
-
-declare void @_ZN5QFontD1Ev(%struct.QFont*)
-
-declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
-
-declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
-
-declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
-
-declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
-
-declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
-
-declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
-
-declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
-
-declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
-
-declare i32 @_Z13qt_defaultDpiv()
-
-declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
-
-declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
-
-declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
-
-declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
-
-declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
-
-declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
-
-declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
-
-declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
-
-declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
-
-declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
-
-declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
-
-declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
index 03dcdf5..93c62c2 100644
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ b/test/Transforms/SimplifyLibCalls/floor.ll
@@ -9,6 +9,8 @@
 ; DO-SIMPLIFY: call float @ceilf(
 ; DO-SIMPLIFY: call float @roundf(
 ; DO-SIMPLIFY: call float @nearbyintf(
+; DO-SIMPLIFY: call float @truncf(
+; DO-SIMPLIFY: call float @fabsf(
 
 ; C89-SIMPLIFY: call float @floorf(
 ; C89-SIMPLIFY: call float @ceilf(
@@ -19,6 +21,8 @@
 ; DONT-SIMPLIFY: call double @ceil(
 ; DONT-SIMPLIFY: call double @round(
 ; DONT-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @trunc(
+; DONT-SIMPLIFY: call double @fabs(
 
 declare double @floor(double)
 
@@ -28,6 +32,10 @@ declare double @round(double)
 
 declare double @nearbyint(double)
 
+declare double @trunc(double)
+
+declare double @fabs(double)
+
 define float @test_floor(float %C) {
 	%D = fpext float %C to double		; <double> [#uses=1]
         ; --> floorf
@@ -60,3 +68,18 @@ define float @test_nearbyint(float %C) {
 	ret float %F
 }
 
+define float @test_trunc(float %C) {
+	%D = fpext float %C to double
+	; --> truncf
+        %E = call double @trunc(double %D)
+	%F = fptrunc double %E to float
+	ret float %F
+}
+
+define float @test_fabs(float %C) {
+	%D = fpext float %C to double
+	; --> fabsf
+        %E = call double @fabs(double %D)
+	%F = fptrunc double %E to float
+	ret float %F
+}
diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll
index 06f40f0..c2750bb 100644
--- a/test/Verifier/invoke.ll
+++ b/test/Verifier/invoke.ll
@@ -19,7 +19,6 @@ L2:		; preds = %0
 	br label %L
 L:		; preds = %L2, %L1, %L1
 ; CHECK: The unwind destination does not have a landingpad instruction
-; CHECK: Instruction does not dominate all uses
 	ret i32 %A
 }
 
@@ -34,9 +33,12 @@ define void @f1() {
 entry:
 ; OK
   invoke void @llvm.donothing()
-  to label %cont unwind label %cont
+  to label %conta unwind label %contb
 
-cont:
+conta:
+  ret void
+
+contb:
   %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
           filter [0 x i8*] zeroinitializer
   ret void
@@ -63,3 +65,15 @@ entry:
   %call = call i32 @fn(i8 (i8, i8)* @llvm.expect.i8)
   ret i32 %call
 }
+
+define void @f4() {
+entry:
+  invoke void @llvm.donothing()
+  to label %cont unwind label %cont
+
+cont:
+; CHECK: Block containing LandingPadInst must be jumped to only by the unwind edge of an invoke.
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          filter [0 x i8*] zeroinitializer
+  ret void
+}
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 004763f..81f297f 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <memory>
 using namespace llvm;
@@ -214,6 +215,11 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss",
   cl::init(false));
 
 static cl::opt<bool>
+DisableSimplifyLibCalls("disable-simplify-libcalls",
+  cl::desc("Disable simplify-libcalls"),
+  cl::init(false));
+
+static cl::opt<bool>
 EnableGuaranteedTailCallOpt("tailcallopt",
   cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
   cl::init(false));
@@ -262,6 +268,11 @@ static cl::opt<std::string> StartAfter("start-after",
   cl::value_desc("pass-name"),
   cl::init(""));
 
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+              cl::desc("Lower bound for a buffer to be considered for "
+                       "stack protection"));
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -453,6 +464,7 @@ int main(int argc, char **argv) {
   Options.PositionIndependentExecutable = EnablePIE;
   Options.EnableSegmentedStacks = SegmentedStacks;
   Options.UseInitArray = UseInitArray;
+  Options.SSPBufferSize = SSPBufferSize;
 
   std::auto_ptr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(),
@@ -487,6 +499,12 @@ int main(int argc, char **argv) {
   // Build up all of the passes that we want to do to the module.
   PassManager PM;
 
+  // Add an appropriate TargetLibraryInfo pass for the module's triple.
+  TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple);
+  if (DisableSimplifyLibCalls)
+    TLI->disableAllFunctions();
+  PM.add(TLI);
+
   // Add the target data from the target machine, if it exists, or the module.
   if (const TargetData *TD = Target.getTargetData())
     PM.add(new TargetData(*TD));
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index c1c8b24..7c53701 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -50,7 +50,7 @@ static cl::extrahelp MoreHelp(
   "  m[abiSs]     - move file(s) in the archive\n"
   "  p[kN]        - print file(s) found in the archive\n"
   "  q[ufsS]      - quick append file(s) to the archive\n"
-  "  r[abfiuzRsS] - replace or insert file(s) into the archive\n"
+  "  r[abfiuRsS]  - replace or insert file(s) into the archive\n"
   "  t            - display contents of archive\n"
   "  x[No]        - extract file(s) from the archive\n"
   "\nMODIFIERS (operation specific):\n"
@@ -66,7 +66,6 @@ static cl::extrahelp MoreHelp(
   "  [s] - create an archive index (cf. ranlib)\n"
   "  [S] - do not build a symbol table\n"
   "  [u] - update only files newer than archive contents\n"
-  "  [z] - compress files before inserting/extracting\n"
   "\nMODIFIERS (generic):\n"
   "  [c] - do not warn if the library had to be created\n"
   "  [v] - be verbose about actions taken\n"
@@ -101,7 +100,6 @@ bool SymTable = true;            ///< 's' & 'S' modifiers
 bool OnlyUpdate = false;         ///< 'u' modifier
 bool Verbose = false;            ///< 'v' modifier
 bool ReallyVerbose = false;      ///< 'V' modifier
-bool Compression = false;        ///< 'z' modifier
 
 // Relative Positional Argument (for insert/move). This variable holds
 // the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -208,7 +206,6 @@ ArchiveOperation parseCommandLine() {
     case 'u': OnlyUpdate = true; break;
     case 'v': Verbose = true; break;
     case 'V': Verbose = ReallyVerbose = true; break;
-    case 'z': Compression = true; break;
     case 'a':
       getRelPos();
       AddAfter = true;
@@ -260,8 +257,6 @@ ArchiveOperation parseCommandLine() {
     throw "The 'f' modifier is only applicable to the 'q' and 'r' operations";
   if (OnlyUpdate && Operation != ReplaceOrInsert)
     throw "The 'u' modifier is only applicable to the 'r' operation";
-  if (Compression && Operation!=ReplaceOrInsert && Operation!=Extract)
-    throw "The 'z' modifier is only applicable to the 'r' and 'x' operations";
   if (Count > 1 && Members.size() > 1)
     throw "Only one member name may be specified with the 'N' modifier";
 
@@ -413,8 +408,6 @@ doDisplayTable(std::string* ErrMsg) {
         // Zrw-r--r--  500/ 500    525 Nov  8 17:42 2004 Makefile
         if (I->isBitcode())
           outs() << "b";
-        else if (I->isCompressed())
-          outs() << "Z";
         else
           outs() << " ";
         unsigned mode = I->getMode();
@@ -437,7 +430,7 @@ doDisplayTable(std::string* ErrMsg) {
 }
 
 // doExtract - Implement the 'x' operation. This function extracts files back to
-// the file system, making sure to uncompress any that were compressed
+// the file system.
 bool
 doExtract(std::string* ErrMsg) {
   if (buildPaths(false, ErrMsg))
@@ -503,7 +496,7 @@ doDelete(std::string* ErrMsg) {
   }
 
   // We're done editting, reconstruct the archive.
-  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,ErrMsg))
     return true;
   if (ReallyVerbose)
     printSymbolTable();
@@ -558,7 +551,7 @@ doMove(std::string* ErrMsg) {
   }
 
   // We're done editting, reconstruct the archive.
-  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,ErrMsg))
     return true;
   if (ReallyVerbose)
     printSymbolTable();
@@ -583,7 +576,7 @@ doQuickAppend(std::string* ErrMsg) {
   }
 
   // We're done editting, reconstruct the archive.
-  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,ErrMsg))
     return true;
   if (ReallyVerbose)
     printSymbolTable();
@@ -681,7 +674,7 @@ doReplaceOrInsert(std::string* ErrMsg) {
   }
 
   // We're done editting, reconstruct the archive.
-  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,ErrMsg))
     return true;
   if (ReallyVerbose)
     printSymbolTable();
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 9afbd4d..eb52acc 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -256,7 +256,6 @@ static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
   if (GV.hasPrivateLinkage() ||
       GV.hasLinkerPrivateLinkage() ||
       GV.hasLinkerPrivateWeakLinkage() ||
-      GV.hasLinkerPrivateWeakDefAutoLinkage() ||
       GV.hasAvailableExternallyLinkage())
     return;
   char TypeChar = TypeCharForSymbol(GV);
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 7aaebb2..b431c76 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -104,7 +104,7 @@ static bool error(error_code ec) {
   return true;
 }
 
-static const Target *GetTarget(const ObjectFile *Obj = NULL) {
+static const Target *getTarget(const ObjectFile *Obj = NULL) {
   // Figure out the target triple.
   llvm::Triple TheTriple("unknown-unknown-unknown");
   if (TripleName.empty()) {
@@ -163,11 +163,11 @@ static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
 }
 
 static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
-  const Target *TheTarget = GetTarget(Obj);
-  if (!TheTarget) {
-    // GetTarget prints out stuff.
+  const Target *TheTarget = getTarget(Obj);
+  // getTarget() will have already issued a diagnostic if necessary, so
+  // just bail here if it failed.
+  if (!TheTarget)
     return;
-  }
 
   error_code ec;
   for (section_iterator i = Obj->begin_sections(),
@@ -206,7 +206,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     if (InlineRelocs) {
       for (relocation_iterator ri = i->begin_relocations(),
                                re = i->end_relocations();
-                              ri != re; ri.increment(ec)) {
+                               ri != re; ri.increment(ec)) {
         if (error(ec)) break;
         Rels.push_back(*ri);
       }
@@ -463,9 +463,8 @@ static void PrintCOFFSymbolTable(const COFFObjectFile *coff) {
                << format("assoc %d comdat %d\n"
                          , unsigned(asd->Number)
                          , unsigned(asd->Selection));
-      } else {
+      } else
         outs() << "AUX Unknown\n";
-      }
     } else {
       StringRef name;
       if (error(coff->getSymbol(i, symbol))) return;
@@ -609,13 +608,12 @@ static void DumpInput(StringRef file) {
     return;
   }
 
-  if (Archive *a = dyn_cast<Archive>(binary.get())) {
+  if (Archive *a = dyn_cast<Archive>(binary.get()))
     DumpArchive(a);
-  } else if (ObjectFile *o = dyn_cast<ObjectFile>(binary.get())) {
+  else if (ObjectFile *o = dyn_cast<ObjectFile>(binary.get()))
     DumpObject(o);
-  } else {
+  else
     errs() << ToolName << ": '" << file << "': " << "Unrecognized file type.\n";
-  }
 }
 
 int main(int argc, char **argv) {
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index 64f795f..4006765 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -81,7 +81,7 @@ int main(int argc, char **argv) {
     if (!TheArchive)
       throw err_msg;
 
-    if (TheArchive->writeToDisk(true, false, false, &err_msg ))
+    if (TheArchive->writeToDisk(true, false, &err_msg ))
       throw err_msg;
 
     if (Verbose)
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 75bee07..6d6c6e9 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -63,7 +63,7 @@ ifeq ($(HOST_OS),Darwin)
     endif
 endif
 
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD GNU))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD GNU Bitrig))
     # Include everything from the .a's into the shared library.
     LLVMLibsOptions := -Wl,--whole-archive $(LLVMLibsOptions) \
                        -Wl,--no-whole-archive
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 0813947..b80bc34 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -46,10 +46,12 @@
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
-static cl::opt<bool> DisableInline("disable-inlining", cl::init(false),
+static cl::opt<bool>
+DisableInline("disable-inlining", cl::init(false),
   cl::desc("Do not run the inliner pass"));
 
-static cl::opt<bool> DisableGVNLoadPRE("disable-gvn-loadpre", cl::init(false),
+static cl::opt<bool>
+DisableGVNLoadPRE("disable-gvn-loadpre", cl::init(false),
   cl::desc("Do not run the GVN load PRE pass"));
 
 const char* LTOCodeGenerator::getVersionString() {
@@ -152,7 +154,7 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
 bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg) {
   // make unique temp .o file to put generated object file
   sys::PathWithStatus uniqueObjPath("lto-llvm.o");
-  if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
+  if (uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg)) {
     uniqueObjPath.eraseFromDisk();
     return true;
   }
@@ -172,7 +174,7 @@ bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg) {
   }
 
   objFile.keep();
-  if ( genResult ) {
+  if (genResult) {
     uniqueObjPath.eraseFromDisk();
     return true;
   }
@@ -202,47 +204,49 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg) {
   sys::Path(_nativeObjectPath).eraseFromDisk();
 
   // return buffer, unless error
-  if ( _nativeObjectFile == NULL )
+  if (_nativeObjectFile == NULL)
     return NULL;
   *length = _nativeObjectFile->getBufferSize();
   return _nativeObjectFile->getBufferStart();
 }
 
 bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
-  if ( _target == NULL ) {
-    std::string Triple = _linker.getModule()->getTargetTriple();
-    if (Triple.empty())
-      Triple = sys::getDefaultTargetTriple();
-
-    // create target machine from info for merged modules
-    const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
-    if ( march == NULL )
-      return true;
-
-    // The relocation model is actually a static member of TargetMachine and
-    // needs to be set before the TargetMachine is instantiated.
-    Reloc::Model RelocModel = Reloc::Default;
-    switch( _codeModel ) {
-    case LTO_CODEGEN_PIC_MODEL_STATIC:
-      RelocModel = Reloc::Static;
-      break;
-    case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-      RelocModel = Reloc::PIC_;
-      break;
-    case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-      RelocModel = Reloc::DynamicNoPIC;
-      break;
-    }
-
-    // construct LTOModule, hand over ownership of module and target
-    SubtargetFeatures Features;
-    Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
-    std::string FeatureStr = Features.getString();
-    TargetOptions Options;
-    _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
-                                         RelocModel, CodeModel::Default,
-                                         CodeGenOpt::Aggressive);
+  if (_target != NULL)
+    return false;
+
+  std::string Triple = _linker.getModule()->getTargetTriple();
+  if (Triple.empty())
+    Triple = sys::getDefaultTargetTriple();
+
+  // create target machine from info for merged modules
+  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  if (march == NULL)
+    return true;
+
+  // The relocation model is actually a static member of TargetMachine and
+  // needs to be set before the TargetMachine is instantiated.
+  Reloc::Model RelocModel = Reloc::Default;
+  switch (_codeModel) {
+  case LTO_CODEGEN_PIC_MODEL_STATIC:
+    RelocModel = Reloc::Static;
+    break;
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+    RelocModel = Reloc::PIC_;
+    break;
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+    RelocModel = Reloc::DynamicNoPIC;
+    break;
   }
+
+  // construct LTOModule, hand over ownership of module and target
+  SubtargetFeatures Features;
+  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+  std::string FeatureStr = Features.getString();
+  TargetOptions Options;
+  LTOModule::getTargetOptions(Options);
+  _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
+                                       RelocModel, CodeModel::Default,
+                                       CodeGenOpt::Aggressive);
   return false;
 }
 
@@ -334,13 +338,13 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 /// Optimize merged modules using various IPO passes
 bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
                                           std::string &errMsg) {
-  if ( this->determineTarget(errMsg) )
+  if (this->determineTarget(errMsg))
     return true;
 
   Module* mergedModule = _linker.getModule();
 
   // if options were requested, set them
-  if ( !_codegenOptions.empty() )
+  if (!_codegenOptions.empty())
     cl::ParseCommandLineOptions(_codegenOptions.size(),
                                 const_cast<char **>(&_codegenOptions[0]));
 
@@ -402,7 +406,7 @@ void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) {
        !o.first.empty(); o = getToken(o.second)) {
     // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add
     // that.
-    if ( _codegenOptions.empty() )
+    if (_codegenOptions.empty())
       _codegenOptions.push_back(strdup("libLTO"));
     _codegenOptions.push_back(strdup(o.first.str().c_str()));
   }
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 97b5889..d588f6a 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -37,6 +38,123 @@
 #include "llvm/ADT/Triple.h"
 using namespace llvm;
 
+static cl::opt<bool>
+EnableFPMAD("enable-fp-mad",
+  cl::desc("Enable less precise MAD instructions to be generated"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElim("disable-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+  cl::desc("Enable optimizations that may decrease FP precision"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+  cl::desc("Enable FP math optimizations that assume no +-Infs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+  cl::desc("Enable FP math optimizations that assume no NaNs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+  cl::Hidden,
+  cl::desc("Force codegen to assume rounding mode can change dynamically"),
+  cl::init(false));
+
+static cl::opt<bool>
+GenerateSoftFloatCalls("soft-float",
+  cl::desc("Generate software floating point library calls"),
+  cl::init(false));
+
+static cl::opt<llvm::FloatABI::ABIType>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
+
+static cl::opt<llvm::FPOpFusion::FPOpFusionMode>
+FuseFPOps("fp-contract",
+  cl::desc("Enable aggresive formation of fused FP ops"),
+  cl::init(FPOpFusion::Standard),
+  cl::values(
+    clEnumValN(FPOpFusion::Fast, "fast",
+               "Fuse FP ops whenever profitable"),
+    clEnumValN(FPOpFusion::Standard, "on",
+               "Only fuse 'blessed' FP ops."),
+    clEnumValN(FPOpFusion::Strict, "off",
+               "Only fuse FP ops when the result won't be effected."),
+    clEnumValEnd));
+
+static cl::opt<bool>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+  cl::desc("Don't place zero-initialized symbols into bss section"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableTailCalls("disable-tail-calls",
+  cl::desc("Never emit tail calls"),
+  cl::init(false));
+
+static cl::opt<unsigned>
+OverrideStackAlignment("stack-alignment",
+  cl::desc("Override default stack alignment"),
+  cl::init(0));
+
+static cl::opt<bool>
+EnableRealignStack("realign-stack",
+  cl::desc("Realign stack if needed"),
+  cl::init(true));
+
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+  cl::desc("Emit a call to trap function rather than a trap instruction"),
+  cl::init(""));
+
+static cl::opt<bool>
+EnablePIE("enable-pie",
+  cl::desc("Assume the creation of a position independent executable."),
+  cl::init(false));
+
+static cl::opt<bool>
+SegmentedStacks("segmented-stacks",
+  cl::desc("Use segmented stacks if possible."),
+  cl::init(false));
+
+static cl::opt<bool>
+UseInitArray("use-init-array",
+  cl::desc("Use .init_array instead of .ctors."),
+  cl::init(false));
+
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+              cl::desc("Lower bound for a buffer to be considered for "
+                       "stack protection"));
+
 LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t)
   : _module(m), _target(t),
     _context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(), NULL),
@@ -117,6 +235,31 @@ LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
   return makeLTOModule(buffer.take(), errMsg);
 }
 
+void LTOModule::getTargetOptions(TargetOptions &Options) {
+  Options.LessPreciseFPMADOption = EnableFPMAD;
+  Options.NoFramePointerElim = DisableFPElim;
+  Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
+  Options.AllowFPOpFusion = FuseFPOps;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.HonorSignDependentRoundingFPMathOption =
+    EnableHonorSignDependentRoundingFPMath;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.DisableTailCalls = DisableTailCalls;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.RealignStack = EnableRealignStack;
+  Options.TrapFuncName = TrapFuncName;
+  Options.PositionIndependentExecutable = EnablePIE;
+  Options.EnableSegmentedStacks = SegmentedStacks;
+  Options.UseInitArray = UseInitArray;
+  Options.SSPBufferSize = SSPBufferSize;
+}
+
 LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
                                     std::string &errMsg) {
   static bool Initialized = false;
@@ -150,6 +293,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   std::string FeatureStr = Features.getString();
   std::string CPU;
   TargetOptions Options;
+  getTargetOptions(Options);
   TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
                                                      Options);
   LTOModule *Ret = new LTOModule(m.take(), target);
@@ -271,6 +415,9 @@ void LTOModule::addDefinedDataSymbol(GlobalValue *v) {
   // Add to list of defined symbols.
   addDefinedSymbol(v, false);
 
+  if (!v->hasSection() /* || !isTargetDarwin */)
+    return;
+
   // Special case i386/ppc ObjC data structures in magic sections:
   // The issue is that the old ObjC object format did some strange
   // contortions to avoid real linker symbols.  For instance, the
@@ -290,26 +437,25 @@ void LTOModule::addDefinedDataSymbol(GlobalValue *v) {
   // a class was missing.
   // The following synthesizes the implicit .objc_* symbols for the linker
   // from the ObjC data structures generated by the front end.
-  if (v->hasSection() /* && isTargetDarwin */) {
-    // special case if this data blob is an ObjC class definition
-    if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
-      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
-        addObjCClass(gv);
-      }
+
+  // special case if this data blob is an ObjC class definition
+  if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
+    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+      addObjCClass(gv);
     }
+  }
 
-    // special case if this data blob is an ObjC category definition
-    else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
-      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
-        addObjCCategory(gv);
-      }
+  // special case if this data blob is an ObjC category definition
+  else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
+    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+      addObjCCategory(gv);
     }
+  }
 
-    // special case if this data blob is the list of referenced classes
-    else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
-      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
-        addObjCClassRef(gv);
-      }
+  // special case if this data blob is the list of referenced classes
+  else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
+    if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+      addObjCClassRef(gv);
     }
   }
 }
@@ -347,8 +493,7 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, bool isFunction) {
 
   // set definition part
   if (def->hasWeakLinkage() || def->hasLinkOnceLinkage() ||
-      def->hasLinkerPrivateWeakLinkage() ||
-      def->hasLinkerPrivateWeakDefAutoLinkage())
+      def->hasLinkerPrivateWeakLinkage())
     attr |= LTO_SYMBOL_DEFINITION_WEAK;
   else if (def->hasCommonLinkage())
     attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
@@ -364,7 +509,7 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, bool isFunction) {
            def->hasLinkOnceLinkage() || def->hasCommonLinkage() ||
            def->hasLinkerPrivateWeakLinkage())
     attr |= LTO_SYMBOL_SCOPE_DEFAULT;
-  else if (def->hasLinkerPrivateWeakDefAutoLinkage())
+  else if (def->hasLinkOnceODRAutoHideLinkage())
     attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
   else
     attr |= LTO_SYMBOL_SCOPE_INTERNAL;
@@ -658,21 +803,20 @@ bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
                                                   _context, *Streamer,
                                                   *_target->getMCAsmInfo()));
-  OwningPtr<MCSubtargetInfo> STI(_target->getTarget().
-                      createMCSubtargetInfo(_target->getTargetTriple(),
-                                            _target->getTargetCPU(),
-                                            _target->getTargetFeatureString()));
-  OwningPtr<MCTargetAsmParser>
-    TAP(_target->getTarget().createMCAsmParser(*STI, *Parser.get()));
+  const Target &T = _target->getTarget();
+  OwningPtr<MCSubtargetInfo>
+    STI(T.createMCSubtargetInfo(_target->getTargetTriple(),
+                                _target->getTargetCPU(),
+                                _target->getTargetFeatureString()));
+  OwningPtr<MCTargetAsmParser> TAP(T.createMCAsmParser(*STI, *Parser.get()));
   if (!TAP) {
-    errMsg = "target " + std::string(_target->getTarget().getName()) +
-        " does not define AsmParser.";
+    errMsg = "target " + std::string(T.getName()) +
+      " does not define AsmParser.";
     return true;
   }
 
   Parser->setTargetParser(*TAP);
-  int Res = Parser->Run(false);
-  if (Res)
+  if (Parser->Run(false))
     return true;
 
   for (RecordStreamer::const_iterator i = Streamer->begin(),
@@ -687,6 +831,7 @@ bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
              Value == RecordStreamer::Used)
       addAsmGlobalSymbolUndef(Key.data());
   }
+
   return false;
 }
 
@@ -694,8 +839,10 @@ bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
 static bool isDeclaration(const GlobalValue &V) {
   if (V.hasAvailableExternallyLinkage())
     return true;
+
   if (V.isMaterializable())
     return false;
+
   return V.isDeclaration();
 }
 
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index cafb927..8e52206 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -29,6 +29,7 @@ namespace llvm {
   class Function;
   class GlobalValue;
   class MemoryBuffer;
+  class TargetOptions;
   class Value;
 }
 
@@ -126,6 +127,10 @@ public:
     return _asm_undefines;
   }
 
+  /// getTargetOptions - Fill the TargetOptions object with the options
+  /// specified on the command line.
+  static void getTargetOptions(llvm::TargetOptions &Options);
+
 private:
   /// parseSymbols - Parse the symbols from the module and model-level ASM and
   /// add them to either the defined or undefined lists.
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index cc207f7..00b62fe 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -648,6 +648,49 @@ TEST(APFloatTest, exactInverse) {
   EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(0));
 }
 
+TEST(APFloatTest, roundToIntegral) {
+  APFloat T(-0.5), S(3.14), R(APFloat::getLargest(APFloat::IEEEdouble)), P(0.0);
+
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(-1.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+  P = T;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(-0.0, P.convertToDouble());
+
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(3.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(4.0, P.convertToDouble());
+  P = S;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(3.0, P.convertToDouble());
+
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardZero);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardNegative);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmTowardPositive);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+  P = R;
+  P.roundToIntegral(APFloat::rmNearestTiesToEven);
+  EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
+}
+
 TEST(APFloatTest, getLargest) {
   EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle).convertToFloat());
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble).convertToDouble());
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 690ff78..d272b09 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_unittest(ADTTests
   SparseSetTest.cpp
   StringMapTest.cpp
   StringRefTest.cpp
+  TinyPtrVectorTest.cpp
   TripleTest.cpp
   TwineTest.cpp
   VariadicFunctionTest.cpp
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index d35e5bc..7fd71f5 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -88,18 +88,17 @@ int Constructable::numDestructorCalls;
 int Constructable::numAssignmentCalls;
 
 // Test fixture class
+template <typename VectorT>
 class SmallVectorTest : public testing::Test {
 protected:
-  typedef SmallVector<Constructable, 4> VectorType;
-
-  VectorType theVector;
-  VectorType otherVector;
+  VectorT theVector;
+  VectorT otherVector;
 
   void SetUp() {
     Constructable::reset();
   }
 
-  void assertEmpty(VectorType & v) {
+  void assertEmpty(VectorT & v) {
     // Size tests
     EXPECT_EQ(0u, v.size());
     EXPECT_TRUE(v.empty());
@@ -109,7 +108,7 @@ protected:
   }
 
   // Assert that theVector contains the specified values, in order.
-  void assertValuesInOrder(VectorType & v, size_t size, ...) {
+  void assertValuesInOrder(VectorT & v, size_t size, ...) {
     EXPECT_EQ(size, v.size());
 
     va_list ap;
@@ -123,300 +122,327 @@ protected:
   }
 
   // Generate a sequence of values to initialize the vector.
-  void makeSequence(VectorType & v, int start, int end) {
+  void makeSequence(VectorT & v, int start, int end) {
     for (int i = start; i <= end; ++i) {
       v.push_back(Constructable(i));
     }
   }
 };
 
+typedef ::testing::Types<SmallVector<Constructable, 0>,
+                         SmallVector<Constructable, 1>,
+                         SmallVector<Constructable, 2>,
+                         SmallVector<Constructable, 4>
+                         > SmallVectorTestTypes;
+TYPED_TEST_CASE(SmallVectorTest, SmallVectorTestTypes);
+
 // New vector test.
-TEST_F(SmallVectorTest, EmptyVectorTest) {
+TYPED_TEST(SmallVectorTest, EmptyVectorTest) {
   SCOPED_TRACE("EmptyVectorTest");
-  assertEmpty(theVector);
-  EXPECT_TRUE(theVector.rbegin() == theVector.rend());
+  this->assertEmpty(this->theVector);
+  EXPECT_TRUE(this->theVector.rbegin() == this->theVector.rend());
   EXPECT_EQ(0, Constructable::getNumConstructorCalls());
   EXPECT_EQ(0, Constructable::getNumDestructorCalls());
 }
 
 // Simple insertions and deletions.
-TEST_F(SmallVectorTest, PushPopTest) {
+TYPED_TEST(SmallVectorTest, PushPopTest) {
   SCOPED_TRACE("PushPopTest");
 
+  // Track whether the vector will potentially have to grow.
+  bool RequiresGrowth = this->theVector.capacity() < 3;
+
   // Push an element
-  theVector.push_back(Constructable(1));
+  this->theVector.push_back(Constructable(1));
 
   // Size tests
-  assertValuesInOrder(theVector, 1u, 1);
-  EXPECT_FALSE(theVector.begin() == theVector.end());
-  EXPECT_FALSE(theVector.empty());
+  this->assertValuesInOrder(this->theVector, 1u, 1);
+  EXPECT_FALSE(this->theVector.begin() == this->theVector.end());
+  EXPECT_FALSE(this->theVector.empty());
 
   // Push another element
-  theVector.push_back(Constructable(2));
-  assertValuesInOrder(theVector, 2u, 1, 2);
+  this->theVector.push_back(Constructable(2));
+  this->assertValuesInOrder(this->theVector, 2u, 1, 2);
 
   // Insert at beginning
-  theVector.insert(theVector.begin(), theVector[1]);
-  assertValuesInOrder(theVector, 3u, 2, 1, 2);
+  this->theVector.insert(this->theVector.begin(), this->theVector[1]);
+  this->assertValuesInOrder(this->theVector, 3u, 2, 1, 2);
 
   // Pop one element
-  theVector.pop_back();
-  assertValuesInOrder(theVector, 2u, 2, 1);
+  this->theVector.pop_back();
+  this->assertValuesInOrder(this->theVector, 2u, 2, 1);
 
   // Pop remaining elements
-  theVector.pop_back();
-  theVector.pop_back();
-  assertEmpty(theVector);
+  this->theVector.pop_back();
+  this->theVector.pop_back();
+  this->assertEmpty(this->theVector);
 
   // Check number of constructor calls. Should be 2 for each list element,
   // one for the argument to push_back, one for the argument to insert,
   // and one for the list element itself.
-  EXPECT_EQ(5, Constructable::getNumConstructorCalls());
-  EXPECT_EQ(5, Constructable::getNumDestructorCalls());
+  if (!RequiresGrowth) {
+    EXPECT_EQ(5, Constructable::getNumConstructorCalls());
+    EXPECT_EQ(5, Constructable::getNumDestructorCalls());
+  } else {
+    // If we had to grow the vector, these only have a lower bound, but should
+    // always be equal.
+    EXPECT_LE(5, Constructable::getNumConstructorCalls());
+    EXPECT_EQ(Constructable::getNumConstructorCalls(),
+              Constructable::getNumDestructorCalls());
+  }
 }
 
 // Clear test.
-TEST_F(SmallVectorTest, ClearTest) {
+TYPED_TEST(SmallVectorTest, ClearTest) {
   SCOPED_TRACE("ClearTest");
 
-  makeSequence(theVector, 1, 2);
-  theVector.clear();
+  this->theVector.reserve(2);
+  this->makeSequence(this->theVector, 1, 2);
+  this->theVector.clear();
 
-  assertEmpty(theVector);
+  this->assertEmpty(this->theVector);
   EXPECT_EQ(4, Constructable::getNumConstructorCalls());
   EXPECT_EQ(4, Constructable::getNumDestructorCalls());
 }
 
 // Resize smaller test.
-TEST_F(SmallVectorTest, ResizeShrinkTest) {
+TYPED_TEST(SmallVectorTest, ResizeShrinkTest) {
   SCOPED_TRACE("ResizeShrinkTest");
 
-  makeSequence(theVector, 1, 3);
-  theVector.resize(1);
+  this->theVector.reserve(3);
+  this->makeSequence(this->theVector, 1, 3);
+  this->theVector.resize(1);
 
-  assertValuesInOrder(theVector, 1u, 1);
+  this->assertValuesInOrder(this->theVector, 1u, 1);
   EXPECT_EQ(6, Constructable::getNumConstructorCalls());
   EXPECT_EQ(5, Constructable::getNumDestructorCalls());
 }
 
 // Resize bigger test.
-TEST_F(SmallVectorTest, ResizeGrowTest) {
+TYPED_TEST(SmallVectorTest, ResizeGrowTest) {
   SCOPED_TRACE("ResizeGrowTest");
 
-  theVector.resize(2);
+  this->theVector.resize(2);
 
   // The extra constructor/destructor calls come from the temporary object used
   // to initialize the contents of the resized array (via copy construction).
   EXPECT_EQ(3, Constructable::getNumConstructorCalls());
   EXPECT_EQ(1, Constructable::getNumDestructorCalls());
-  EXPECT_EQ(2u, theVector.size());
+  EXPECT_EQ(2u, this->theVector.size());
 }
 
 // Resize with fill value.
-TEST_F(SmallVectorTest, ResizeFillTest) {
+TYPED_TEST(SmallVectorTest, ResizeFillTest) {
   SCOPED_TRACE("ResizeFillTest");
 
-  theVector.resize(3, Constructable(77));
-  assertValuesInOrder(theVector, 3u, 77, 77, 77);
+  this->theVector.resize(3, Constructable(77));
+  this->assertValuesInOrder(this->theVector, 3u, 77, 77, 77);
 }
 
 // Overflow past fixed size.
-TEST_F(SmallVectorTest, OverflowTest) {
+TYPED_TEST(SmallVectorTest, OverflowTest) {
   SCOPED_TRACE("OverflowTest");
 
   // Push more elements than the fixed size.
-  makeSequence(theVector, 1, 10);
+  this->makeSequence(this->theVector, 1, 10);
 
   // Test size and values.
-  EXPECT_EQ(10u, theVector.size());
+  EXPECT_EQ(10u, this->theVector.size());
   for (int i = 0; i < 10; ++i) {
-    EXPECT_EQ(i+1, theVector[i].getValue());
+    EXPECT_EQ(i+1, this->theVector[i].getValue());
   }
 
   // Now resize back to fixed size.
-  theVector.resize(1);
+  this->theVector.resize(1);
 
-  assertValuesInOrder(theVector, 1u, 1);
+  this->assertValuesInOrder(this->theVector, 1u, 1);
 }
 
 // Iteration tests.
-TEST_F(SmallVectorTest, IterationTest) {
-  makeSequence(theVector, 1, 2);
+TYPED_TEST(SmallVectorTest, IterationTest) {
+  this->makeSequence(this->theVector, 1, 2);
 
   // Forward Iteration
-  VectorType::iterator it = theVector.begin();
-  EXPECT_TRUE(*it == theVector.front());
-  EXPECT_TRUE(*it == theVector[0]);
+  typename TypeParam::iterator it = this->theVector.begin();
+  EXPECT_TRUE(*it == this->theVector.front());
+  EXPECT_TRUE(*it == this->theVector[0]);
   EXPECT_EQ(1, it->getValue());
   ++it;
-  EXPECT_TRUE(*it == theVector[1]);
-  EXPECT_TRUE(*it == theVector.back());
+  EXPECT_TRUE(*it == this->theVector[1]);
+  EXPECT_TRUE(*it == this->theVector.back());
   EXPECT_EQ(2, it->getValue());
   ++it;
-  EXPECT_TRUE(it == theVector.end());
+  EXPECT_TRUE(it == this->theVector.end());
   --it;
-  EXPECT_TRUE(*it == theVector[1]);
+  EXPECT_TRUE(*it == this->theVector[1]);
   EXPECT_EQ(2, it->getValue());
   --it;
-  EXPECT_TRUE(*it == theVector[0]);
+  EXPECT_TRUE(*it == this->theVector[0]);
   EXPECT_EQ(1, it->getValue());
 
   // Reverse Iteration
-  VectorType::reverse_iterator rit = theVector.rbegin();
-  EXPECT_TRUE(*rit == theVector[1]);
+  typename TypeParam::reverse_iterator rit = this->theVector.rbegin();
+  EXPECT_TRUE(*rit == this->theVector[1]);
   EXPECT_EQ(2, rit->getValue());
   ++rit;
-  EXPECT_TRUE(*rit == theVector[0]);
+  EXPECT_TRUE(*rit == this->theVector[0]);
   EXPECT_EQ(1, rit->getValue());
   ++rit;
-  EXPECT_TRUE(rit == theVector.rend());
+  EXPECT_TRUE(rit == this->theVector.rend());
   --rit;
-  EXPECT_TRUE(*rit == theVector[0]);
+  EXPECT_TRUE(*rit == this->theVector[0]);
   EXPECT_EQ(1, rit->getValue());
   --rit;
-  EXPECT_TRUE(*rit == theVector[1]);
+  EXPECT_TRUE(*rit == this->theVector[1]);
   EXPECT_EQ(2, rit->getValue());
 }
 
 // Swap test.
-TEST_F(SmallVectorTest, SwapTest) {
+TYPED_TEST(SmallVectorTest, SwapTest) {
   SCOPED_TRACE("SwapTest");
 
-  makeSequence(theVector, 1, 2);
-  std::swap(theVector, otherVector);
+  this->makeSequence(this->theVector, 1, 2);
+  std::swap(this->theVector, this->otherVector);
 
-  assertEmpty(theVector);
-  assertValuesInOrder(otherVector, 2u, 1, 2);
+  this->assertEmpty(this->theVector);
+  this->assertValuesInOrder(this->otherVector, 2u, 1, 2);
 }
 
 // Append test
-TEST_F(SmallVectorTest, AppendTest) {
+TYPED_TEST(SmallVectorTest, AppendTest) {
   SCOPED_TRACE("AppendTest");
 
-  makeSequence(otherVector, 2, 3);
+  this->makeSequence(this->otherVector, 2, 3);
 
-  theVector.push_back(Constructable(1));
-  theVector.append(otherVector.begin(), otherVector.end());
+  this->theVector.push_back(Constructable(1));
+  this->theVector.append(this->otherVector.begin(), this->otherVector.end());
 
-  assertValuesInOrder(theVector, 3u, 1, 2, 3);
+  this->assertValuesInOrder(this->theVector, 3u, 1, 2, 3);
 }
 
 // Append repeated test
-TEST_F(SmallVectorTest, AppendRepeatedTest) {
+TYPED_TEST(SmallVectorTest, AppendRepeatedTest) {
   SCOPED_TRACE("AppendRepeatedTest");
 
-  theVector.push_back(Constructable(1));
-  theVector.append(2, Constructable(77));
-  assertValuesInOrder(theVector, 3u, 1, 77, 77);
+  this->theVector.push_back(Constructable(1));
+  this->theVector.append(2, Constructable(77));
+  this->assertValuesInOrder(this->theVector, 3u, 1, 77, 77);
 }
 
 // Assign test
-TEST_F(SmallVectorTest, AssignTest) {
+TYPED_TEST(SmallVectorTest, AssignTest) {
   SCOPED_TRACE("AssignTest");
 
-  theVector.push_back(Constructable(1));
-  theVector.assign(2, Constructable(77));
-  assertValuesInOrder(theVector, 2u, 77, 77);
+  this->theVector.push_back(Constructable(1));
+  this->theVector.assign(2, Constructable(77));
+  this->assertValuesInOrder(this->theVector, 2u, 77, 77);
 }
 
 // Erase a single element
-TEST_F(SmallVectorTest, EraseTest) {
+TYPED_TEST(SmallVectorTest, EraseTest) {
   SCOPED_TRACE("EraseTest");
 
-  makeSequence(theVector, 1, 3);
-  theVector.erase(theVector.begin());
-  assertValuesInOrder(theVector, 2u, 2, 3);
+  this->makeSequence(this->theVector, 1, 3);
+  this->theVector.erase(this->theVector.begin());
+  this->assertValuesInOrder(this->theVector, 2u, 2, 3);
 }
 
 // Erase a range of elements
-TEST_F(SmallVectorTest, EraseRangeTest) {
+TYPED_TEST(SmallVectorTest, EraseRangeTest) {
   SCOPED_TRACE("EraseRangeTest");
 
-  makeSequence(theVector, 1, 3);
-  theVector.erase(theVector.begin(), theVector.begin() + 2);
-  assertValuesInOrder(theVector, 1u, 3);
+  this->makeSequence(this->theVector, 1, 3);
+  this->theVector.erase(this->theVector.begin(), this->theVector.begin() + 2);
+  this->assertValuesInOrder(this->theVector, 1u, 3);
 }
 
 // Insert a single element.
-TEST_F(SmallVectorTest, InsertTest) {
+TYPED_TEST(SmallVectorTest, InsertTest) {
   SCOPED_TRACE("InsertTest");
 
-  makeSequence(theVector, 1, 3);
-  VectorType::iterator I =
-    theVector.insert(theVector.begin() + 1, Constructable(77));
-  EXPECT_EQ(theVector.begin() + 1, I);
-  assertValuesInOrder(theVector, 4u, 1, 77, 2, 3);
+  this->makeSequence(this->theVector, 1, 3);
+  typename TypeParam::iterator I =
+    this->theVector.insert(this->theVector.begin() + 1, Constructable(77));
+  EXPECT_EQ(this->theVector.begin() + 1, I);
+  this->assertValuesInOrder(this->theVector, 4u, 1, 77, 2, 3);
 }
 
 // Insert repeated elements.
-TEST_F(SmallVectorTest, InsertRepeatedTest) {
+TYPED_TEST(SmallVectorTest, InsertRepeatedTest) {
   SCOPED_TRACE("InsertRepeatedTest");
 
-  makeSequence(theVector, 10, 15);
-  VectorType::iterator I =
-    theVector.insert(theVector.begin() + 1, 2, Constructable(16));
-  EXPECT_EQ(theVector.begin() + 1, I);
-  assertValuesInOrder(theVector, 8u, 10, 16, 16, 11, 12, 13, 14, 15);
+  this->makeSequence(this->theVector, 10, 15);
+  typename TypeParam::iterator I =
+    this->theVector.insert(this->theVector.begin() + 1, 2, Constructable(16));
+  EXPECT_EQ(this->theVector.begin() + 1, I);
+  this->assertValuesInOrder(this->theVector, 8u,
+                      10, 16, 16, 11, 12, 13, 14, 15);
 
   // Insert at end.
-  I = theVector.insert(theVector.end(), 2, Constructable(16));
-  EXPECT_EQ(theVector.begin() + 8, I);
-  assertValuesInOrder(theVector, 10u, 10, 16, 16, 11, 12, 13, 14, 15, 16, 16);
+  I = this->theVector.insert(this->theVector.end(), 2, Constructable(16));
+  EXPECT_EQ(this->theVector.begin() + 8, I);
+  this->assertValuesInOrder(this->theVector, 10u,
+                      10, 16, 16, 11, 12, 13, 14, 15, 16, 16);
 
   // Empty insert.
-  EXPECT_EQ(theVector.end(),
-            theVector.insert(theVector.end(), 0, Constructable(42)));
-  EXPECT_EQ(theVector.begin() + 1,
-            theVector.insert(theVector.begin() + 1, 0, Constructable(42)));
+  EXPECT_EQ(this->theVector.end(),
+            this->theVector.insert(this->theVector.end(),
+                                   0, Constructable(42)));
+  EXPECT_EQ(this->theVector.begin() + 1,
+            this->theVector.insert(this->theVector.begin() + 1,
+                                   0, Constructable(42)));
 }
 
 // Insert range.
-TEST_F(SmallVectorTest, InsertRangeTest) {
+TYPED_TEST(SmallVectorTest, InsertRangeTest) {
   SCOPED_TRACE("InsertRangeTest");
 
   Constructable Arr[3] =
     { Constructable(77), Constructable(77), Constructable(77) };
 
-  makeSequence(theVector, 1, 3);
-  VectorType::iterator I =
-    theVector.insert(theVector.begin() + 1, Arr, Arr+3);
-  EXPECT_EQ(theVector.begin() + 1, I);
-  assertValuesInOrder(theVector, 6u, 1, 77, 77, 77, 2, 3);
+  this->makeSequence(this->theVector, 1, 3);
+  typename TypeParam::iterator I =
+    this->theVector.insert(this->theVector.begin() + 1, Arr, Arr+3);
+  EXPECT_EQ(this->theVector.begin() + 1, I);
+  this->assertValuesInOrder(this->theVector, 6u, 1, 77, 77, 77, 2, 3);
 
   // Insert at end.
-  I = theVector.insert(theVector.end(), Arr, Arr+3);
-  EXPECT_EQ(theVector.begin() + 6, I);
-  assertValuesInOrder(theVector, 9u, 1, 77, 77, 77, 2, 3, 77, 77, 77);
+  I = this->theVector.insert(this->theVector.end(), Arr, Arr+3);
+  EXPECT_EQ(this->theVector.begin() + 6, I);
+  this->assertValuesInOrder(this->theVector, 9u,
+                            1, 77, 77, 77, 2, 3, 77, 77, 77);
 
   // Empty insert.
-  EXPECT_EQ(theVector.end(), theVector.insert(theVector.end(),
-                                              theVector.begin(),
-                                              theVector.begin()));
-  EXPECT_EQ(theVector.begin() + 1, theVector.insert(theVector.begin() + 1,
-                                                    theVector.begin(),
-                                                    theVector.begin()));
+  EXPECT_EQ(this->theVector.end(),
+            this->theVector.insert(this->theVector.end(),
+                                   this->theVector.begin(),
+                                   this->theVector.begin()));
+  EXPECT_EQ(this->theVector.begin() + 1,
+            this->theVector.insert(this->theVector.begin() + 1,
+                                   this->theVector.begin(),
+                                   this->theVector.begin()));
 }
 
 // Comparison tests.
-TEST_F(SmallVectorTest, ComparisonTest) {
+TYPED_TEST(SmallVectorTest, ComparisonTest) {
   SCOPED_TRACE("ComparisonTest");
 
-  makeSequence(theVector, 1, 3);
-  makeSequence(otherVector, 1, 3);
+  this->makeSequence(this->theVector, 1, 3);
+  this->makeSequence(this->otherVector, 1, 3);
 
-  EXPECT_TRUE(theVector == otherVector);
-  EXPECT_FALSE(theVector != otherVector);
+  EXPECT_TRUE(this->theVector == this->otherVector);
+  EXPECT_FALSE(this->theVector != this->otherVector);
 
-  otherVector.clear();
-  makeSequence(otherVector, 2, 4);
+  this->otherVector.clear();
+  this->makeSequence(this->otherVector, 2, 4);
 
-  EXPECT_FALSE(theVector == otherVector);
-  EXPECT_TRUE(theVector != otherVector);
+  EXPECT_FALSE(this->theVector == this->otherVector);
+  EXPECT_TRUE(this->theVector != this->otherVector);
 }
 
 // Constant vector tests.
-TEST_F(SmallVectorTest, ConstVectorTest) {
-  VectorType constVector;
+TYPED_TEST(SmallVectorTest, ConstVectorTest) {
+  const TypeParam constVector;
 
   EXPECT_EQ(0u, constVector.size());
   EXPECT_TRUE(constVector.empty());
@@ -424,26 +450,27 @@ TEST_F(SmallVectorTest, ConstVectorTest) {
 }
 
 // Direct array access.
-TEST_F(SmallVectorTest, DirectVectorTest) {
-  EXPECT_EQ(0u, theVector.size());
-  EXPECT_LE(4u, theVector.capacity());
+TYPED_TEST(SmallVectorTest, DirectVectorTest) {
+  EXPECT_EQ(0u, this->theVector.size());
+  this->theVector.reserve(4);
+  EXPECT_LE(4u, this->theVector.capacity());
   EXPECT_EQ(0, Constructable::getNumConstructorCalls());
-  theVector.end()[0] = 1;
-  theVector.end()[1] = 2;
-  theVector.end()[2] = 3;
-  theVector.end()[3] = 4;
-  theVector.set_size(4);
-  EXPECT_EQ(4u, theVector.size());
+  this->theVector.end()[0] = 1;
+  this->theVector.end()[1] = 2;
+  this->theVector.end()[2] = 3;
+  this->theVector.end()[3] = 4;
+  this->theVector.set_size(4);
+  EXPECT_EQ(4u, this->theVector.size());
   EXPECT_EQ(4, Constructable::getNumConstructorCalls());
-  EXPECT_EQ(1, theVector[0].getValue());
-  EXPECT_EQ(2, theVector[1].getValue());
-  EXPECT_EQ(3, theVector[2].getValue());
-  EXPECT_EQ(4, theVector[3].getValue());
+  EXPECT_EQ(1, this->theVector[0].getValue());
+  EXPECT_EQ(2, this->theVector[1].getValue());
+  EXPECT_EQ(3, this->theVector[2].getValue());
+  EXPECT_EQ(4, this->theVector[3].getValue());
 }
 
-TEST_F(SmallVectorTest, IteratorTest) {
+TYPED_TEST(SmallVectorTest, IteratorTest) {
   std::list<int> L;
-  theVector.insert(theVector.end(), L.begin(), L.end());
+  this->theVector.insert(this->theVector.end(), L.begin(), L.end());
 }
 
 struct notassignable {
@@ -451,7 +478,7 @@ struct notassignable {
   notassignable(int &x) : x(x) {}
 };
 
-TEST_F(SmallVectorTest, NoAssignTest) {
+TEST(SmallVectorCustomTest, NoAssignTest) {
   int x = 0;
   SmallVector<notassignable, 2> vec;
   vec.push_back(notassignable(x));
diff --git a/unittests/ADT/TinyPtrVectorTest.cpp b/unittests/ADT/TinyPtrVectorTest.cpp
new file mode 100644
index 0000000..05dd797
--- /dev/null
+++ b/unittests/ADT/TinyPtrVectorTest.cpp
@@ -0,0 +1,448 @@
+//===- llvm/unittest/ADT/TinyPtrVectorTest.cpp ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TinyPtrVector unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Support/type_traits.h"
+#include <algorithm>
+#include <list>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+// The world's worst RNG, but it is deterministic and makes it easy to get
+// *some* shuffling of elements.
+static ptrdiff_t test_shuffle_rng(ptrdiff_t i) {
+  return (i + i * 33) % i;
+}
+static ptrdiff_t (*test_shuffle_rng_p)(ptrdiff_t) = &test_shuffle_rng;
+
+template <typename VectorT>
+class TinyPtrVectorTest : public testing::Test {
+protected:
+  typedef typename VectorT::value_type PtrT;
+  typedef typename remove_pointer<PtrT>::type ValueT;
+
+  VectorT V;
+  VectorT V2;
+
+  ValueT TestValues[1024];
+  std::vector<PtrT> TestPtrs;
+
+  TinyPtrVectorTest() {
+    for (size_t i = 0, e = array_lengthof(TestValues); i != e; ++i)
+      TestPtrs.push_back(&TestValues[i]);
+
+    std::random_shuffle(TestPtrs.begin(), TestPtrs.end(), test_shuffle_rng_p);
+  }
+
+  ArrayRef<PtrT> testArray(size_t N) {
+    return makeArrayRef(&TestPtrs[0], N);
+  }
+
+  void appendValues(VectorT &V, ArrayRef<PtrT> Values) {
+    for (size_t i = 0, e = Values.size(); i != e; ++i)
+      V.push_back(Values[i]);
+  }
+
+  void setVectors(ArrayRef<PtrT> Values1, ArrayRef<PtrT> Values2) {
+    V.clear();
+    appendValues(V, Values1);
+    V2.clear();
+    appendValues(V2, Values2);
+  }
+
+  void expectValues(const VectorT &V, ArrayRef<PtrT> Values) {
+    EXPECT_EQ(Values.empty(), V.empty());
+    EXPECT_EQ(Values.size(), V.size());
+    for (size_t i = 0, e = Values.size(); i != e; ++i) {
+      EXPECT_EQ(Values[i], V[i]);
+      EXPECT_EQ(Values[i], *llvm::next(V.begin(), i));
+    }
+    EXPECT_EQ(V.end(), llvm::next(V.begin(), Values.size()));
+  }
+};
+
+typedef ::testing::Types<TinyPtrVector<int*>,
+                         TinyPtrVector<double*>
+                         > TinyPtrVectorTestTypes;
+TYPED_TEST_CASE(TinyPtrVectorTest, TinyPtrVectorTestTypes);
+
+TYPED_TEST(TinyPtrVectorTest, EmptyTest) {
+  this->expectValues(this->V, this->testArray(0));
+}
+
+TYPED_TEST(TinyPtrVectorTest, PushPopBack) {
+  this->V.push_back(this->TestPtrs[0]);
+  this->expectValues(this->V, this->testArray(1));
+  this->V.push_back(this->TestPtrs[1]);
+  this->expectValues(this->V, this->testArray(2));
+  this->V.push_back(this->TestPtrs[2]);
+  this->expectValues(this->V, this->testArray(3));
+  this->V.push_back(this->TestPtrs[3]);
+  this->expectValues(this->V, this->testArray(4));
+  this->V.push_back(this->TestPtrs[4]);
+  this->expectValues(this->V, this->testArray(5));
+
+  // Pop and clobber a few values to keep things interesting.
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(4));
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(3));
+  this->TestPtrs[3] = &this->TestValues[42];
+  this->TestPtrs[4] = &this->TestValues[43];
+  this->V.push_back(this->TestPtrs[3]);
+  this->expectValues(this->V, this->testArray(4));
+  this->V.push_back(this->TestPtrs[4]);
+  this->expectValues(this->V, this->testArray(5));
+
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(4));
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(3));
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(2));
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(1));
+  this->V.pop_back();
+  this->expectValues(this->V, this->testArray(0));
+
+  this->appendValues(this->V, this->testArray(42));
+  this->expectValues(this->V, this->testArray(42));
+}
+
+TYPED_TEST(TinyPtrVectorTest, ClearTest) {
+  this->expectValues(this->V, this->testArray(0));
+  this->V.clear();
+  this->expectValues(this->V, this->testArray(0));
+
+  this->appendValues(this->V, this->testArray(1));
+  this->expectValues(this->V, this->testArray(1));
+  this->V.clear();
+  this->expectValues(this->V, this->testArray(0));
+
+  this->appendValues(this->V, this->testArray(42));
+  this->expectValues(this->V, this->testArray(42));
+  this->V.clear();
+  this->expectValues(this->V, this->testArray(0));
+}
+
+TYPED_TEST(TinyPtrVectorTest, CopyAndMoveCtorTest) {
+  this->appendValues(this->V, this->testArray(42));
+  TypeParam Copy(this->V);
+  this->expectValues(Copy, this->testArray(42));
+
+  // This is a separate copy, and so it shouldn't destroy the original.
+  Copy.clear();
+  this->expectValues(Copy, this->testArray(0));
+  this->expectValues(this->V, this->testArray(42));
+
+  TypeParam Copy2(this->V2);
+  this->appendValues(Copy2, this->testArray(42));
+  this->expectValues(Copy2, this->testArray(42));
+  this->expectValues(this->V2, this->testArray(0));
+
+#if LLVM_USE_RVALUE_REFERENCES
+  TypeParam Move(std::move(Copy2));
+  this->expectValues(Move, this->testArray(42));
+  this->expectValues(Copy2, this->testArray(0));
+#endif
+}
+
+TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(0));
+  this->expectValues(this->V2, this->testArray(0));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(0));
+#endif
+
+  this->setVectors(this->testArray(1), this->testArray(0));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(0));
+  this->expectValues(this->V2, this->testArray(0));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(1), this->testArray(0));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(0));
+#endif
+
+  this->setVectors(this->testArray(2), this->testArray(0));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(0));
+  this->expectValues(this->V2, this->testArray(0));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(2), this->testArray(0));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(0));
+#endif
+
+  this->setVectors(this->testArray(42), this->testArray(0));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(0));
+  this->expectValues(this->V2, this->testArray(0));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(42), this->testArray(0));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(0));
+#endif
+
+  this->setVectors(this->testArray(0), this->testArray(1));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(1));
+  this->expectValues(this->V2, this->testArray(1));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(0), this->testArray(1));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(1));
+#endif
+
+  this->setVectors(this->testArray(0), this->testArray(2));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(2));
+  this->expectValues(this->V2, this->testArray(2));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(0), this->testArray(2));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(2));
+#endif
+
+  this->setVectors(this->testArray(0), this->testArray(42));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(42));
+  this->expectValues(this->V2, this->testArray(42));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(0), this->testArray(42));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(42));
+#endif
+
+  this->setVectors(this->testArray(1), this->testArray(1));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(1));
+  this->expectValues(this->V2, this->testArray(1));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(1));
+#endif
+
+  this->setVectors(this->testArray(1), this->testArray(2));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(2));
+  this->expectValues(this->V2, this->testArray(2));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(1), this->testArray(2));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(2));
+#endif
+
+  this->setVectors(this->testArray(1), this->testArray(42));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(42));
+  this->expectValues(this->V2, this->testArray(42));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(1), this->testArray(42));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(42));
+#endif
+
+  this->setVectors(this->testArray(2), this->testArray(1));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(1));
+  this->expectValues(this->V2, this->testArray(1));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(2), this->testArray(1));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(1));
+#endif
+
+  this->setVectors(this->testArray(2), this->testArray(2));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(2));
+  this->expectValues(this->V2, this->testArray(2));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(2), this->testArray(2));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(2));
+#endif
+
+  this->setVectors(this->testArray(2), this->testArray(42));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(42));
+  this->expectValues(this->V2, this->testArray(42));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(2), this->testArray(42));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(42));
+#endif
+
+  this->setVectors(this->testArray(42), this->testArray(1));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(1));
+  this->expectValues(this->V2, this->testArray(1));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(42), this->testArray(1));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(1));
+#endif
+
+  this->setVectors(this->testArray(42), this->testArray(2));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(2));
+  this->expectValues(this->V2, this->testArray(2));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(42), this->testArray(2));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(2));
+#endif
+
+  this->setVectors(this->testArray(42), this->testArray(42));
+  this->V = this->V2;
+  this->expectValues(this->V, this->testArray(42));
+  this->expectValues(this->V2, this->testArray(42));
+#if LLVM_USE_RVALUE_REFERENCES
+  this->setVectors(this->testArray(42), this->testArray(42));
+  this->V = std::move(this->V2);
+  this->expectValues(this->V, this->testArray(42));
+#endif
+}
+
+TYPED_TEST(TinyPtrVectorTest, EraseTest) {
+  this->appendValues(this->V, this->testArray(1));
+  this->expectValues(this->V, this->testArray(1));
+  this->V.erase(this->V.begin());
+  this->expectValues(this->V, this->testArray(0));
+
+  this->appendValues(this->V, this->testArray(42));
+  this->expectValues(this->V, this->testArray(42));
+  this->V.erase(this->V.begin());
+  this->TestPtrs.erase(this->TestPtrs.begin());
+  this->expectValues(this->V, this->testArray(41));
+  this->V.erase(llvm::next(this->V.begin(), 1));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 1));
+  this->expectValues(this->V, this->testArray(40));
+  this->V.erase(llvm::next(this->V.begin(), 2));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 2));
+  this->expectValues(this->V, this->testArray(39));
+  this->V.erase(llvm::next(this->V.begin(), 5));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 5));
+  this->expectValues(this->V, this->testArray(38));
+  this->V.erase(llvm::next(this->V.begin(), 13));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 13));
+  this->expectValues(this->V, this->testArray(37));
+
+  typename TypeParam::iterator I = this->V.begin();
+  do {
+    I = this->V.erase(I);
+  } while (I != this->V.end());
+  this->expectValues(this->V, this->testArray(0));
+}
+
+TYPED_TEST(TinyPtrVectorTest, EraseRangeTest) {
+  this->appendValues(this->V, this->testArray(1));
+  this->expectValues(this->V, this->testArray(1));
+  this->V.erase(this->V.begin(), this->V.begin());
+  this->expectValues(this->V, this->testArray(1));
+  this->V.erase(this->V.end(), this->V.end());
+  this->expectValues(this->V, this->testArray(1));
+  this->V.erase(this->V.begin(), this->V.end());
+  this->expectValues(this->V, this->testArray(0));
+
+  this->appendValues(this->V, this->testArray(42));
+  this->expectValues(this->V, this->testArray(42));
+  this->V.erase(this->V.begin(), llvm::next(this->V.begin(), 1));
+  this->TestPtrs.erase(this->TestPtrs.begin(),
+                       llvm::next(this->TestPtrs.begin(), 1));
+  this->expectValues(this->V, this->testArray(41));
+  this->V.erase(llvm::next(this->V.begin(), 1), llvm::next(this->V.begin(), 2));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 1),
+                       llvm::next(this->TestPtrs.begin(), 2));
+  this->expectValues(this->V, this->testArray(40));
+  this->V.erase(llvm::next(this->V.begin(), 2), llvm::next(this->V.begin(), 4));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 2),
+                       llvm::next(this->TestPtrs.begin(), 4));
+  this->expectValues(this->V, this->testArray(38));
+  this->V.erase(llvm::next(this->V.begin(), 5), llvm::next(this->V.begin(), 10));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 5),
+                       llvm::next(this->TestPtrs.begin(), 10));
+  this->expectValues(this->V, this->testArray(33));
+  this->V.erase(llvm::next(this->V.begin(), 13), llvm::next(this->V.begin(), 26));
+  this->TestPtrs.erase(llvm::next(this->TestPtrs.begin(), 13),
+                       llvm::next(this->TestPtrs.begin(), 26));
+  this->expectValues(this->V, this->testArray(20));
+  this->V.erase(llvm::next(this->V.begin(), 7), this->V.end());
+  this->expectValues(this->V, this->testArray(7));
+  this->V.erase(this->V.begin(), this->V.end());
+  this->expectValues(this->V, this->testArray(0));
+}
+
+TYPED_TEST(TinyPtrVectorTest, Insert) {
+  this->V.insert(this->V.end(), this->TestPtrs[0]);
+  this->expectValues(this->V, this->testArray(1));
+  this->V.clear();
+  this->appendValues(this->V, this->testArray(4));
+  this->expectValues(this->V, this->testArray(4));
+  this->V.insert(this->V.end(), this->TestPtrs[4]);
+  this->expectValues(this->V, this->testArray(5));
+  this->V.insert(this->V.begin(), this->TestPtrs[42]);
+  this->TestPtrs.insert(this->TestPtrs.begin(), this->TestPtrs[42]);
+  this->expectValues(this->V, this->testArray(6));
+  this->V.insert(llvm::next(this->V.begin(), 3), this->TestPtrs[43]);
+  this->TestPtrs.insert(llvm::next(this->TestPtrs.begin(), 3),
+                        this->TestPtrs[43]);
+  this->expectValues(this->V, this->testArray(7));
+}
+
+TYPED_TEST(TinyPtrVectorTest, InsertRange) {
+  this->V.insert(this->V.end(), this->TestPtrs.begin(), this->TestPtrs.begin());
+  this->expectValues(this->V, this->testArray(0));
+  this->V.insert(this->V.begin(), this->TestPtrs.begin(),
+                 this->TestPtrs.begin());
+  this->expectValues(this->V, this->testArray(0));
+  this->V.insert(this->V.end(), this->TestPtrs.end(), this->TestPtrs.end());
+  this->expectValues(this->V, this->testArray(0));
+  this->V.insert(this->V.end(), this->TestPtrs.begin(),
+                 llvm::next(this->TestPtrs.begin()));
+  this->expectValues(this->V, this->testArray(1));
+  this->V.clear();
+  this->V.insert(this->V.end(), this->TestPtrs.begin(),
+                 llvm::next(this->TestPtrs.begin(), 2));
+  this->expectValues(this->V, this->testArray(2));
+  this->V.clear();
+  this->V.insert(this->V.end(), this->TestPtrs.begin(),
+                 llvm::next(this->TestPtrs.begin(), 42));
+  this->expectValues(this->V, this->testArray(42));
+  this->V.clear();
+  this->V.insert(this->V.end(),
+                 llvm::next(this->TestPtrs.begin(), 5),
+                 llvm::next(this->TestPtrs.begin(), 13));
+  this->V.insert(this->V.begin(),
+                 llvm::next(this->TestPtrs.begin(), 0),
+                 llvm::next(this->TestPtrs.begin(), 3));
+  this->V.insert(llvm::next(this->V.begin(), 2),
+                 llvm::next(this->TestPtrs.begin(), 2),
+                 llvm::next(this->TestPtrs.begin(), 4));
+  this->V.erase(llvm::next(this->V.begin(), 4));
+  this->V.insert(llvm::next(this->V.begin(), 4),
+                 llvm::next(this->TestPtrs.begin(), 4),
+                 llvm::next(this->TestPtrs.begin(), 5));
+  this->expectValues(this->V, this->testArray(13));
+}
+
+}
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 0fde6fc..89f7e8e 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -555,10 +555,10 @@ TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
 #endif
 }
 
-// ARM and MIPS do not have an implementation
+// ARM does not have an implementation
 // of replaceMachineCodeForFunction(), so recompileAndRelinkFunction
 // doesn't work.
-#if !defined(__arm__) && !defined(__mips__)
+#if !defined(__arm__)
 TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
   Function *F = Function::Create(TypeBuilder<int(void), false>::get(Context),
                                  GlobalValue::ExternalLinkage, "test", M);
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index c45db2c..6f57668 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -178,150 +178,150 @@ TEST(AlignOfTest, BasicAlignedArray) {
   // types because of the bugs mentioned above where GCC and Clang both
   // disregard the arbitrary alignment specifier until the type is used to
   // declare a member of a struct.
-  EXPECT_LE(1u, alignOf<AlignedCharArray<SA1>::union_type>());
-  EXPECT_LE(2u, alignOf<AlignedCharArray<SA2>::union_type>());
-  EXPECT_LE(4u, alignOf<AlignedCharArray<SA4>::union_type>());
-  EXPECT_LE(8u, alignOf<AlignedCharArray<SA8>::union_type>());
+  EXPECT_LE(1u, alignOf<AlignedCharArrayUnion<SA1> >());
+  EXPECT_LE(2u, alignOf<AlignedCharArrayUnion<SA2> >());
+  EXPECT_LE(4u, alignOf<AlignedCharArrayUnion<SA4> >());
+  EXPECT_LE(8u, alignOf<AlignedCharArrayUnion<SA8> >());
 
-  EXPECT_LE(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_LE(2u, sizeof(AlignedCharArray<SA2>::union_type));
-  EXPECT_LE(4u, sizeof(AlignedCharArray<SA4>::union_type));
-  EXPECT_LE(8u, sizeof(AlignedCharArray<SA8>::union_type));
+  EXPECT_LE(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_LE(2u, sizeof(AlignedCharArrayUnion<SA2>));
+  EXPECT_LE(4u, sizeof(AlignedCharArrayUnion<SA4>));
+  EXPECT_LE(8u, sizeof(AlignedCharArrayUnion<SA8>));
 
-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1, SA2>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1, SA2, SA4>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1, SA2, SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1, SA2> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1, SA2, SA4, SA8> >()));
 
-  EXPECT_EQ(1u, sizeof(AlignedCharArray<SA1>::union_type));
-  EXPECT_EQ(2u, sizeof(AlignedCharArray<SA1, SA2>::union_type));
-  EXPECT_EQ(4u, sizeof(AlignedCharArray<SA1, SA2, SA4>::union_type));
-  EXPECT_EQ(8u, sizeof(AlignedCharArray<SA1, SA2, SA4, SA8>::union_type));
+  EXPECT_EQ(1u, sizeof(AlignedCharArrayUnion<SA1>));
+  EXPECT_EQ(2u, sizeof(AlignedCharArrayUnion<SA1, SA2>));
+  EXPECT_EQ(4u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4>));
+  EXPECT_EQ(8u, sizeof(AlignedCharArrayUnion<SA1, SA2, SA4, SA8>));
 
-  EXPECT_EQ(1u, (alignOf<AlignedCharArray<SA1[1]>::union_type>()));
-  EXPECT_EQ(2u, (alignOf<AlignedCharArray<SA1[2], SA2[1]>::union_type>()));
-  EXPECT_EQ(4u, (alignOf<AlignedCharArray<SA1[42], SA2[55],
-                                          SA4[13]>::union_type>()));
-  EXPECT_EQ(8u, (alignOf<AlignedCharArray<SA1[2], SA2[1],
-                                          SA4, SA8>::union_type>()));
+  EXPECT_EQ(1u, (alignOf<AlignedCharArrayUnion<SA1[1]> >()));
+  EXPECT_EQ(2u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1]> >()));
+  EXPECT_EQ(4u, (alignOf<AlignedCharArrayUnion<SA1[42], SA2[55],
+                                               SA4[13]> >()));
+  EXPECT_EQ(8u, (alignOf<AlignedCharArrayUnion<SA1[2], SA2[1],
+                                               SA4, SA8> >()));
 
-  EXPECT_EQ(1u,  sizeof(AlignedCharArray<SA1[1]>::union_type));
-  EXPECT_EQ(2u,  sizeof(AlignedCharArray<SA1[2], SA2[1]>::union_type));
-  EXPECT_EQ(4u,  sizeof(AlignedCharArray<SA1[3], SA2[2], SA4>::union_type));
-  EXPECT_EQ(16u, sizeof(AlignedCharArray<SA1, SA2[3],
-                                         SA4[3], SA8>::union_type));
+  EXPECT_EQ(1u,  sizeof(AlignedCharArrayUnion<SA1[1]>));
+  EXPECT_EQ(2u,  sizeof(AlignedCharArrayUnion<SA1[2], SA2[1]>));
+  EXPECT_EQ(4u,  sizeof(AlignedCharArrayUnion<SA1[3], SA2[2], SA4>));
+  EXPECT_EQ(16u, sizeof(AlignedCharArrayUnion<SA1, SA2[3],
+                                              SA4[3], SA8>));
 
   // For other tests we simply assert that the alignment of the union mathes
   // that of the fundamental type and hope that we have any weird type
   // productions that would trigger bugs.
-  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArray<char>::union_type>());
-  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArray<short>::union_type>());
-  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArray<int>::union_type>());
-  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArray<long>::union_type>());
+  EXPECT_EQ(alignOf<char>(), alignOf<AlignedCharArrayUnion<char> >());
+  EXPECT_EQ(alignOf<short>(), alignOf<AlignedCharArrayUnion<short> >());
+  EXPECT_EQ(alignOf<int>(), alignOf<AlignedCharArrayUnion<int> >());
+  EXPECT_EQ(alignOf<long>(), alignOf<AlignedCharArrayUnion<long> >());
   EXPECT_EQ(alignOf<long long>(),
-            alignOf<AlignedCharArray<long long>::union_type>());
-  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArray<float>::union_type>());
-  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArray<double>::union_type>());
+            alignOf<AlignedCharArrayUnion<long long> >());
+  EXPECT_EQ(alignOf<float>(), alignOf<AlignedCharArrayUnion<float> >());
+  EXPECT_EQ(alignOf<double>(), alignOf<AlignedCharArrayUnion<double> >());
   EXPECT_EQ(alignOf<long double>(),
-            alignOf<AlignedCharArray<long double>::union_type>());
-  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArray<void *>::union_type>());
-  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArray<int *>::union_type>());
+            alignOf<AlignedCharArrayUnion<long double> >());
+  EXPECT_EQ(alignOf<void *>(), alignOf<AlignedCharArrayUnion<void *> >());
+  EXPECT_EQ(alignOf<int *>(), alignOf<AlignedCharArrayUnion<int *> >());
   EXPECT_EQ(alignOf<double (*)(double)>(),
-            alignOf<AlignedCharArray<double (*)(double)>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (*)(double)> >());
   EXPECT_EQ(alignOf<double (S6::*)()>(),
-            alignOf<AlignedCharArray<double (S6::*)()>::union_type>());
-  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArray<S1>::union_type>());
-  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArray<S2>::union_type>());
-  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArray<S3>::union_type>());
-  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArray<S4>::union_type>());
-  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArray<S5>::union_type>());
-  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArray<S6>::union_type>());
-  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArray<D1>::union_type>());
-  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArray<D2>::union_type>());
-  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArray<D3>::union_type>());
-  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArray<D4>::union_type>());
-  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArray<D5>::union_type>());
-  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArray<D6>::union_type>());
-  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArray<D7>::union_type>());
-  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArray<D8>::union_type>());
-  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArray<D9>::union_type>());
-  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArray<V1>::union_type>());
-  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArray<V2>::union_type>());
-  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArray<V3>::union_type>());
-  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArray<V4>::union_type>());
-  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArray<V5>::union_type>());
-  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArray<V6>::union_type>());
-  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArray<V7>::union_type>());
+            alignOf<AlignedCharArrayUnion<double (S6::*)()> >());
+  EXPECT_EQ(alignOf<S1>(), alignOf<AlignedCharArrayUnion<S1> >());
+  EXPECT_EQ(alignOf<S2>(), alignOf<AlignedCharArrayUnion<S2> >());
+  EXPECT_EQ(alignOf<S3>(), alignOf<AlignedCharArrayUnion<S3> >());
+  EXPECT_EQ(alignOf<S4>(), alignOf<AlignedCharArrayUnion<S4> >());
+  EXPECT_EQ(alignOf<S5>(), alignOf<AlignedCharArrayUnion<S5> >());
+  EXPECT_EQ(alignOf<S6>(), alignOf<AlignedCharArrayUnion<S6> >());
+  EXPECT_EQ(alignOf<D1>(), alignOf<AlignedCharArrayUnion<D1> >());
+  EXPECT_EQ(alignOf<D2>(), alignOf<AlignedCharArrayUnion<D2> >());
+  EXPECT_EQ(alignOf<D3>(), alignOf<AlignedCharArrayUnion<D3> >());
+  EXPECT_EQ(alignOf<D4>(), alignOf<AlignedCharArrayUnion<D4> >());
+  EXPECT_EQ(alignOf<D5>(), alignOf<AlignedCharArrayUnion<D5> >());
+  EXPECT_EQ(alignOf<D6>(), alignOf<AlignedCharArrayUnion<D6> >());
+  EXPECT_EQ(alignOf<D7>(), alignOf<AlignedCharArrayUnion<D7> >());
+  EXPECT_EQ(alignOf<D8>(), alignOf<AlignedCharArrayUnion<D8> >());
+  EXPECT_EQ(alignOf<D9>(), alignOf<AlignedCharArrayUnion<D9> >());
+  EXPECT_EQ(alignOf<V1>(), alignOf<AlignedCharArrayUnion<V1> >());
+  EXPECT_EQ(alignOf<V2>(), alignOf<AlignedCharArrayUnion<V2> >());
+  EXPECT_EQ(alignOf<V3>(), alignOf<AlignedCharArrayUnion<V3> >());
+  EXPECT_EQ(alignOf<V4>(), alignOf<AlignedCharArrayUnion<V4> >());
+  EXPECT_EQ(alignOf<V5>(), alignOf<AlignedCharArrayUnion<V5> >());
+  EXPECT_EQ(alignOf<V6>(), alignOf<AlignedCharArrayUnion<V6> >());
+  EXPECT_EQ(alignOf<V7>(), alignOf<AlignedCharArrayUnion<V7> >());
 
   // Some versions of MSVC get this wrong somewhat disturbingly. The failure
   // appears to be benign: alignOf<V8>() produces a preposterous value: 12
 #ifndef _MSC_VER
-  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArray<V8>::union_type>());
+  EXPECT_EQ(alignOf<V8>(), alignOf<AlignedCharArrayUnion<V8> >());
 #endif
 
-  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArray<char>::union_type));
-  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArray<char[1]>::union_type));
-  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArray<char[2]>::union_type));
-  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArray<char[3]>::union_type));
-  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArray<char[4]>::union_type));
-  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArray<char[5]>::union_type));
-  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArray<char[8]>::union_type));
-  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArray<char[13]>::union_type));
-  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArray<char[16]>::union_type));
-  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArray<char[21]>::union_type));
-  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArray<char[32]>::union_type));
-  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArray<short>::union_type));
-  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArray<int>::union_type));
-  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArray<long>::union_type));
+  EXPECT_EQ(sizeof(char), sizeof(AlignedCharArrayUnion<char>));
+  EXPECT_EQ(sizeof(char[1]), sizeof(AlignedCharArrayUnion<char[1]>));
+  EXPECT_EQ(sizeof(char[2]), sizeof(AlignedCharArrayUnion<char[2]>));
+  EXPECT_EQ(sizeof(char[3]), sizeof(AlignedCharArrayUnion<char[3]>));
+  EXPECT_EQ(sizeof(char[4]), sizeof(AlignedCharArrayUnion<char[4]>));
+  EXPECT_EQ(sizeof(char[5]), sizeof(AlignedCharArrayUnion<char[5]>));
+  EXPECT_EQ(sizeof(char[8]), sizeof(AlignedCharArrayUnion<char[8]>));
+  EXPECT_EQ(sizeof(char[13]), sizeof(AlignedCharArrayUnion<char[13]>));
+  EXPECT_EQ(sizeof(char[16]), sizeof(AlignedCharArrayUnion<char[16]>));
+  EXPECT_EQ(sizeof(char[21]), sizeof(AlignedCharArrayUnion<char[21]>));
+  EXPECT_EQ(sizeof(char[32]), sizeof(AlignedCharArrayUnion<char[32]>));
+  EXPECT_EQ(sizeof(short), sizeof(AlignedCharArrayUnion<short>));
+  EXPECT_EQ(sizeof(int), sizeof(AlignedCharArrayUnion<int>));
+  EXPECT_EQ(sizeof(long), sizeof(AlignedCharArrayUnion<long>));
   EXPECT_EQ(sizeof(long long),
-            sizeof(AlignedCharArray<long long>::union_type));
-  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArray<float>::union_type));
-  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArray<double>::union_type));
+            sizeof(AlignedCharArrayUnion<long long>));
+  EXPECT_EQ(sizeof(float), sizeof(AlignedCharArrayUnion<float>));
+  EXPECT_EQ(sizeof(double), sizeof(AlignedCharArrayUnion<double>));
   EXPECT_EQ(sizeof(long double),
-            sizeof(AlignedCharArray<long double>::union_type));
-  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArray<void *>::union_type));
-  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArray<int *>::union_type));
+            sizeof(AlignedCharArrayUnion<long double>));
+  EXPECT_EQ(sizeof(void *), sizeof(AlignedCharArrayUnion<void *>));
+  EXPECT_EQ(sizeof(int *), sizeof(AlignedCharArrayUnion<int *>));
   EXPECT_EQ(sizeof(double (*)(double)),
-            sizeof(AlignedCharArray<double (*)(double)>::union_type));
+            sizeof(AlignedCharArrayUnion<double (*)(double)>));
   EXPECT_EQ(sizeof(double (S6::*)()),
-            sizeof(AlignedCharArray<double (S6::*)()>::union_type));
-  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArray<S1>::union_type));
-  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArray<S2>::union_type));
-  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArray<S3>::union_type));
-  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArray<S4>::union_type));
-  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArray<S5>::union_type));
-  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArray<S6>::union_type));
-  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArray<D1>::union_type));
-  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArray<D2>::union_type));
-  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArray<D3>::union_type));
-  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArray<D4>::union_type));
-  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArray<D5>::union_type));
-  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArray<D6>::union_type));
-  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArray<D7>::union_type));
-  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArray<D8>::union_type));
-  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArray<D9>::union_type));
-  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArray<D9[1]>::union_type));
-  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArray<D9[2]>::union_type));
-  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArray<D9[3]>::union_type));
-  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArray<D9[4]>::union_type));
-  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArray<D9[5]>::union_type));
-  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArray<D9[8]>::union_type));
-  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArray<D9[13]>::union_type));
-  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArray<D9[16]>::union_type));
-  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArray<D9[21]>::union_type));
-  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArray<D9[32]>::union_type));
-  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArray<V1>::union_type));
-  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArray<V2>::union_type));
-  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArray<V3>::union_type));
-  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArray<V4>::union_type));
-  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArray<V5>::union_type));
-  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArray<V6>::union_type));
-  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArray<V7>::union_type));
+            sizeof(AlignedCharArrayUnion<double (S6::*)()>));
+  EXPECT_EQ(sizeof(S1), sizeof(AlignedCharArrayUnion<S1>));
+  EXPECT_EQ(sizeof(S2), sizeof(AlignedCharArrayUnion<S2>));
+  EXPECT_EQ(sizeof(S3), sizeof(AlignedCharArrayUnion<S3>));
+  EXPECT_EQ(sizeof(S4), sizeof(AlignedCharArrayUnion<S4>));
+  EXPECT_EQ(sizeof(S5), sizeof(AlignedCharArrayUnion<S5>));
+  EXPECT_EQ(sizeof(S6), sizeof(AlignedCharArrayUnion<S6>));
+  EXPECT_EQ(sizeof(D1), sizeof(AlignedCharArrayUnion<D1>));
+  EXPECT_EQ(sizeof(D2), sizeof(AlignedCharArrayUnion<D2>));
+  EXPECT_EQ(sizeof(D3), sizeof(AlignedCharArrayUnion<D3>));
+  EXPECT_EQ(sizeof(D4), sizeof(AlignedCharArrayUnion<D4>));
+  EXPECT_EQ(sizeof(D5), sizeof(AlignedCharArrayUnion<D5>));
+  EXPECT_EQ(sizeof(D6), sizeof(AlignedCharArrayUnion<D6>));
+  EXPECT_EQ(sizeof(D7), sizeof(AlignedCharArrayUnion<D7>));
+  EXPECT_EQ(sizeof(D8), sizeof(AlignedCharArrayUnion<D8>));
+  EXPECT_EQ(sizeof(D9), sizeof(AlignedCharArrayUnion<D9>));
+  EXPECT_EQ(sizeof(D9[1]), sizeof(AlignedCharArrayUnion<D9[1]>));
+  EXPECT_EQ(sizeof(D9[2]), sizeof(AlignedCharArrayUnion<D9[2]>));
+  EXPECT_EQ(sizeof(D9[3]), sizeof(AlignedCharArrayUnion<D9[3]>));
+  EXPECT_EQ(sizeof(D9[4]), sizeof(AlignedCharArrayUnion<D9[4]>));
+  EXPECT_EQ(sizeof(D9[5]), sizeof(AlignedCharArrayUnion<D9[5]>));
+  EXPECT_EQ(sizeof(D9[8]), sizeof(AlignedCharArrayUnion<D9[8]>));
+  EXPECT_EQ(sizeof(D9[13]), sizeof(AlignedCharArrayUnion<D9[13]>));
+  EXPECT_EQ(sizeof(D9[16]), sizeof(AlignedCharArrayUnion<D9[16]>));
+  EXPECT_EQ(sizeof(D9[21]), sizeof(AlignedCharArrayUnion<D9[21]>));
+  EXPECT_EQ(sizeof(D9[32]), sizeof(AlignedCharArrayUnion<D9[32]>));
+  EXPECT_EQ(sizeof(V1), sizeof(AlignedCharArrayUnion<V1>));
+  EXPECT_EQ(sizeof(V2), sizeof(AlignedCharArrayUnion<V2>));
+  EXPECT_EQ(sizeof(V3), sizeof(AlignedCharArrayUnion<V3>));
+  EXPECT_EQ(sizeof(V4), sizeof(AlignedCharArrayUnion<V4>));
+  EXPECT_EQ(sizeof(V5), sizeof(AlignedCharArrayUnion<V5>));
+  EXPECT_EQ(sizeof(V6), sizeof(AlignedCharArrayUnion<V6>));
+  EXPECT_EQ(sizeof(V7), sizeof(AlignedCharArrayUnion<V7>));
 
   // Some versions of MSVC also get this wrong. The failure again appears to be
   // benign: sizeof(V8) is only 52 bytes, but our array reserves 56.
 #ifndef _MSC_VER
-  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArray<V8>::union_type));
+  EXPECT_EQ(sizeof(V8), sizeof(AlignedCharArrayUnion<V8>));
 #endif
 }
 
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 8859678..3b9bf84 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_unittest(SupportTests
   ConstantRangeTest.cpp
   DataExtractorTest.cpp
   EndianTest.cpp
+  FileOutputBufferTest.cpp
   IntegersSubsetTest.cpp
   LeakDetectorTest.cpp
   ManagedStatic.cpp
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
index ca0b40b..dc0205f 100644
--- a/unittests/Support/Casting.cpp
+++ b/unittests/Support/Casting.cpp
@@ -95,8 +95,9 @@ TEST(CastingTest, cast) {
   EXPECT_NE(&F5, null_foo);
   const foo *F6 = cast<foo>(B4);
   EXPECT_NE(F6, null_foo);
-  foo *F7 = cast<foo>(fub());
-  EXPECT_EQ(F7, null_foo);
+  // Can't pass null pointer to cast<>.
+  // foo *F7 = cast<foo>(fub());
+  // EXPECT_EQ(F7, null_foo);
   foo *F8 = B1.baz();
   EXPECT_NE(F8, null_foo);
 }
@@ -121,7 +122,8 @@ TEST(CastingTest, dyn_cast) {
   EXPECT_NE(F2, null_foo);
   const foo *F3 = dyn_cast<foo>(B4);
   EXPECT_NE(F3, null_foo);
-  // foo *F4 = dyn_cast<foo>(fub()); // not permittible
+  // Can't pass null pointer to dyn_cast<>.
+  // foo *F4 = dyn_cast<foo>(fub());
   // EXPECT_EQ(F4, null_foo);
   foo *F5 = B1.daz();
   EXPECT_NE(F5, null_foo);
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 72fa24a..13e9038 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -55,6 +55,17 @@ TEST(CommandLineTest, ParseEnvironment) {
   EXPECT_EQ("hello", EnvironmentTestOption);
 }
 
+// This test used to make valgrind complain
+// ("Conditional jump or move depends on uninitialised value(s)")
+TEST(CommandLineTest, ParseEnvironmentToLocalVar) {
+  // Put cl::opt on stack to check for proper initialization of fields.
+  cl::opt<std::string> EnvironmentTestOptionLocal("env-test-opt-local");
+  TempEnvVar TEV(test_env_var, "-env-test-opt-local=hello-local");
+  EXPECT_EQ("", EnvironmentTestOptionLocal);
+  cl::ParseEnvironmentOptions("CommandLineTest", test_env_var);
+  EXPECT_EQ("hello-local", EnvironmentTestOptionLocal);
+}
+
 #endif  // SKIP_ENVIRONMENT_TESTS
 
 }  // anonymous namespace
diff --git a/unittests/Support/DataExtractorTest.cpp b/unittests/Support/DataExtractorTest.cpp
index 9813e46..ec8bd3d 100644
--- a/unittests/Support/DataExtractorTest.cpp
+++ b/unittests/Support/DataExtractorTest.cpp
@@ -16,6 +16,7 @@ namespace {
 const char numberData[] = "\x80\x90\xFF\xFF\x80\x00\x00\x00";
 const char stringData[] = "hellohello\0hello";
 const char leb128data[] = "\xA6\x49";
+const char bigleb128data[] = "\xAA\xA9\xFF\xAA\xFF\xAA\xFF\x4A";
 
 TEST(DataExtractorTest, OffsetOverflow) {
   DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8);
@@ -106,6 +107,14 @@ TEST(DataExtractorTest, LEB128) {
   offset = 0;
   EXPECT_EQ(-7002LL, DE.getSLEB128(&offset));
   EXPECT_EQ(2U, offset);
+
+  DataExtractor BDE(StringRef(bigleb128data, sizeof(bigleb128data)-1), false,8);
+  offset = 0;
+  EXPECT_EQ(42218325750568106ULL, BDE.getULEB128(&offset));
+  EXPECT_EQ(8U, offset);
+  offset = 0;
+  EXPECT_EQ(-29839268287359830LL, BDE.getSLEB128(&offset));
+  EXPECT_EQ(8U, offset);
 }
 
 }
diff --git a/unittests/Support/FileOutputBufferTest.cpp b/unittests/Support/FileOutputBufferTest.cpp
new file mode 100644
index 0000000..edd350a
--- /dev/null
+++ b/unittests/Support/FileOutputBufferTest.cpp
@@ -0,0 +1,137 @@
+//===- llvm/unittest/Support/FileOutputBuffer.cpp - unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+
+#define ASSERT_NO_ERROR(x) \
+  if (error_code ASSERT_NO_ERROR_ec = x) { \
+    errs() << #x ": did not return errc::success.\n" \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n" \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \
+  } else {}
+
+namespace {
+
+
+// NOTE: Temporarily run this test on unix only.  Once the file mapping
+// routines are ported to Windows, this conditional can be removed.
+#if LLVM_ON_UNIX
+
+
+TEST(FileOutputBuffer, Test) {
+  // Create unique temporary directory for these tests
+  SmallString<128> TestDirectory;
+  {
+    int fd;
+    ASSERT_NO_ERROR(
+      fs::unique_file("FileOutputBuffer-test-%%-%%-%%-%%/dir", fd,
+                      TestDirectory));
+    ::close(fd);
+    TestDirectory = path::parent_path(TestDirectory);
+  }
+     
+  // TEST 1: Verify commit case.
+  SmallString<128> File1(TestDirectory);
+	File1.append("/file1");
+  {
+    OwningPtr<FileOutputBuffer> Buffer;
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File1, 8192, Buffer));
+    // Start buffer with special header.
+    memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Write to end of buffer to verify it is writable.
+    memcpy(Buffer->getBufferEnd() - 20, "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Commit buffer.
+    ASSERT_NO_ERROR(Buffer->commit());
+  }
+  // Verify file exists and starts with special header.
+  bool MagicMatches = false;
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File1), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+                                                                MagicMatches));
+  EXPECT_TRUE(MagicMatches);
+  // Verify file is correct size.
+  uint64_t File1Size;
+  ASSERT_NO_ERROR(fs::file_size(Twine(File1), File1Size));
+  ASSERT_EQ(File1Size, 8192ULL);
+
+ 	// TEST 2: Verify abort case.
+  SmallString<128> File2(TestDirectory);
+	File2.append("/file2");
+  {
+    OwningPtr<FileOutputBuffer> Buffer2;
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File2, 8192, Buffer2));
+    // Fill buffer with special header.
+    memcpy(Buffer2->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Do *not* commit buffer.
+  }
+  // Verify file does not exist (because buffer not commited).
+  bool Exists = false;
+  ASSERT_NO_ERROR(fs::exists(Twine(File2), Exists));
+  EXPECT_FALSE(Exists);  
+
+
+  // TEST 3: Verify sizing down case.
+  SmallString<128> File3(TestDirectory);
+	File3.append("/file3");
+  {
+    OwningPtr<FileOutputBuffer> Buffer;
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File3, 8192000, Buffer));
+    // Start buffer with special header.
+    memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Write to end of buffer to verify it is writable.
+    memcpy(Buffer->getBufferEnd() - 20, "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Commit buffer, but size down to smaller size
+    ASSERT_NO_ERROR(Buffer->commit(5000));
+  }
+  // Verify file exists and starts with special header.
+  bool MagicMatches3 = false;
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File3), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+                                                              MagicMatches3));
+  EXPECT_TRUE(MagicMatches3);
+  // Verify file is correct size.
+  uint64_t File3Size;
+  ASSERT_NO_ERROR(fs::file_size(Twine(File3), File3Size));
+  ASSERT_EQ(File3Size, 5000ULL);
+
+
+  // TEST 4: Verify file can be made executable.
+  SmallString<128> File4(TestDirectory);
+	File4.append("/file4");
+  {
+    OwningPtr<FileOutputBuffer> Buffer;
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer, 
+                                              FileOutputBuffer::F_executable));
+    // Start buffer with special header.
+    memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
+    // Commit buffer.
+    ASSERT_NO_ERROR(Buffer->commit());
+  }
+  // Verify file exists and is executable.
+  fs::file_status Status;
+  ASSERT_NO_ERROR(fs::status(Twine(File4), Status));
+  bool IsExecutable = (Status.permissions() & fs::owner_exe);
+  EXPECT_TRUE(IsExecutable);
+
+  // Clean up.
+  uint32_t RemovedCount;
+  ASSERT_NO_ERROR(fs::remove_all(TestDirectory.str(), RemovedCount));
+}
+
+#endif // LLVM_ON_UNIX
+
+} // anonymous namespace
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index a071a5a..63c9ae0 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -340,44 +340,51 @@ TEST_F(FileSystemTest, Permissions) {
 }
 #endif
 
-#if !defined(_WIN32) // FIXME: temporary suppressed.
 TEST_F(FileSystemTest, FileMapping) {
   // Create a temp file.
   int FileDescriptor;
   SmallString<64> TempPath;
   ASSERT_NO_ERROR(
     fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath));
-
-  // Grow temp file to be 4096 bytes 
-  ASSERT_NO_ERROR(sys::fs::resize_file(Twine(TempPath), 4096));
-  
   // Map in temp file and add some content
-  void* MappedMemory;
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                true /*writable*/, MappedMemory));
-  char* Memory = reinterpret_cast<char*>(MappedMemory);
-  strcpy(Memory, "hello there");
-  
-  // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
+  error_code EC;
+  StringRef Val("hello there");
+  {
+    fs::mapped_file_region mfr(FileDescriptor,
+                               fs::mapped_file_region::readwrite,
+                               4096,
+                               0,
+                               EC);
+    ASSERT_NO_ERROR(EC);
+    std::copy(Val.begin(), Val.end(), mfr.data());
+    // Explicitly add a 0.
+    mfr.data()[Val.size()] = 0;
+    // Unmap temp file
+  }
   
   // Map it back in read-only
-  ASSERT_NO_ERROR(fs::map_file_pages(Twine(TempPath), 0, 4096, 
-                                false /*read-only*/, MappedMemory));
+  fs::mapped_file_region mfr(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
   
   // Verify content
-  Memory = reinterpret_cast<char*>(MappedMemory);
-  bool SAME = (strcmp(Memory, "hello there") == 0);
-  EXPECT_TRUE(SAME);
+  EXPECT_EQ(StringRef(mfr.const_data()), Val);
   
   // Unmap temp file
-  ASSERT_NO_ERROR(fs::unmap_file_pages(MappedMemory, 4096));
-  MappedMemory = NULL;
-  Memory = NULL;
-}
-#endif
-
 
+#if LLVM_USE_RVALUE_REFERENCES
+  fs::mapped_file_region m(Twine(TempPath),
+                             fs::mapped_file_region::readonly,
+                             0,
+                             0,
+                             EC);
+  ASSERT_NO_ERROR(EC);
+  const char *Data = m.const_data();
+  fs::mapped_file_region mfrrv(llvm_move(m));
+  EXPECT_EQ(mfrrv.const_data(), Data);
+#endif
+}
 } // anonymous namespace
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
index e88427a..480a573 100644
--- a/unittests/Support/YAMLParserTest.cpp
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -16,11 +16,17 @@
 
 namespace llvm {
 
+static void SuppressDiagnosticsOutput(const SMDiagnostic &, void *) {
+  // Prevent SourceMgr from writing errors to stderr 
+  // to reduce noise in unit test runs.
+}
+
 // Checks that the given input gives a parse error. Makes sure that an error
 // text is available and the parse fails.
 static void ExpectParseError(StringRef Message, StringRef Input) {
   SourceMgr SM;
   yaml::Stream Stream(Input, SM);
+  SM.setDiagHandler(SuppressDiagnosticsOutput);
   EXPECT_FALSE(Stream.validate()) << Message << ": " << Input;
   EXPECT_TRUE(Stream.failed()) << Message << ": " << Input;
 }
diff --git a/unittests/VMCore/CMakeLists.txt b/unittests/VMCore/CMakeLists.txt
index 632eab5..4025c7a 100644
--- a/unittests/VMCore/CMakeLists.txt
+++ b/unittests/VMCore/CMakeLists.txt
@@ -13,6 +13,7 @@ set(VMCoreSources
   MetadataTest.cpp
   PassManagerTest.cpp
   TypeBuilderTest.cpp
+  TypesTest.cpp
   ValueMapTest.cpp
   VerifierTest.cpp
   )
diff --git a/unittests/VMCore/TypesTest.cpp b/unittests/VMCore/TypesTest.cpp
new file mode 100644
index 0000000..0416643
--- /dev/null
+++ b/unittests/VMCore/TypesTest.cpp
@@ -0,0 +1,30 @@
+//===- llvm/unittest/VMCore/TypesTest.cpp - Type unit tests ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "gtest/gtest.h"
+using namespace llvm;
+
+namespace {
+
+TEST(TypesTest, StructType) {
+  LLVMContext C;
+
+  // PR13522
+  StructType *Struct = StructType::create(C, "FooBar");
+  EXPECT_EQ("FooBar", Struct->getName());
+  Struct->setName(Struct->getName().substr(0, 3));
+  EXPECT_EQ("Foo", Struct->getName());
+  Struct->setName("");
+  EXPECT_TRUE(Struct->getName().empty());
+  EXPECT_FALSE(Struct->hasName());
+}
+
+}  // end anonymous namespace
diff --git a/utils/Makefile b/utils/Makefile
index ecb30be..7a3c17d 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
-	      count fpcmp llvm-lit not unittest
+	      count fpcmp llvm-lit not unittest yaml2obj
 
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 0a8ae46..abcec8f 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -2447,7 +2447,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   emitSubtargetFeatureFlagEnumeration(Info, OS);
 
   // Emit the function to match a register name to number.
-  emitMatchRegisterName(Target, AsmParser, OS);
+  // This should be omitted for Mips target
+  if (AsmParser->getValueAsBit("ShouldEmitMatchRegisterName"))
+    emitMatchRegisterName(Target, AsmParser, OS);
 
   OS << "#endif // GET_REGISTER_MATCHER\n\n";
 
@@ -2649,7 +2651,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
   OS << "      if (i + 1 >= Operands.size()) {\n";
   OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
-  OS << "        if (!OperandsValid) ErrorInfo = i + 1;\n;";
+  OS << "        if (!OperandsValid) ErrorInfo = i + 1;\n";
   OS << "        break;\n";
   OS << "      }\n";
   OS << "      unsigned Diag = validateOperandClass(Operands[i+1],\n";
@@ -2716,8 +2718,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  }\n\n";
 
   OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
-  OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)";
-  OS << "  return RetCode;\n";
+  OS << "  if (HadMatchOtherThanPredicate || !HadMatchOtherThanFeatures)\n";
+  OS << "    return RetCode;\n\n";
   OS << "  // Missing feature matches return which features were missing\n";
   OS << "  ErrorInfo = MissingFeatures;\n";
   OS << "  return Match_MissingFeature;\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index bd153a8..57979b3 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -14,8 +14,8 @@
 
 #include "AsmWriterInst.h"
 #include "CodeGenTarget.h"
-#include "StringToOffsetTable.h"
 #include "SequenceToOffsetTable.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 33381e9..12e153a 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -297,6 +297,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
   isCompare    = R->getValueAsBit("isCompare");
   isMoveImm    = R->getValueAsBit("isMoveImm");
   isBitcast    = R->getValueAsBit("isBitcast");
+  isSelect     = R->getValueAsBit("isSelect");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 3ba9f24..95b572d 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -222,6 +222,7 @@ namespace llvm {
     bool isCompare;
     bool isMoveImm;
     bool isBitcast;
+    bool isSelect;
     bool isBarrier;
     bool isCall;
     bool canFoldAsLoad;
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 81bf9ed..011f4b7 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -28,19 +28,15 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R),
-    EnumValue(Enum)
-{}
-
-std::string CodeGenSubRegIndex::getNamespace() const {
-  if (TheDef->getValue("Namespace"))
-    return TheDef->getValueAsString("Namespace");
-  else
-    return "";
+  : TheDef(R), EnumValue(Enum) {
+  Name = R->getName();
+  if (R->getValue("Namespace"))
+    Namespace = R->getValueAsString("Namespace");
 }
 
-const std::string &CodeGenSubRegIndex::getName() const {
-  return TheDef->getName();
+CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
+                                       unsigned Enum)
+  : TheDef(0), Name(N), Namespace(Nspace), EnumValue(Enum) {
 }
 
 std::string CodeGenSubRegIndex::getQualifiedName() const {
@@ -52,16 +48,31 @@ std::string CodeGenSubRegIndex::getQualifiedName() const {
 }
 
 void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
-  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
-  if (Comps.empty())
+  if (!TheDef)
     return;
-  if (Comps.size() != 2)
-    throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
-  CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
-  CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
-  CodeGenSubRegIndex *X = A->addComposite(B, this);
-  if (X)
-    throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+
+  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
+  if (!Comps.empty()) {
+    if (Comps.size() != 2)
+      throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+    CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
+    CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
+    CodeGenSubRegIndex *X = A->addComposite(B, this);
+    if (X)
+      throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+  }
+
+  std::vector<Record*> Parts =
+    TheDef->getValueAsListOfDefs("CoveringSubRegIndices");
+  if (!Parts.empty()) {
+    if (Parts.size() < 2)
+      throw TGError(TheDef->getLoc(),
+                    "CoveredBySubRegs must have two or more entries");
+    SmallVector<CodeGenSubRegIndex*, 8> IdxParts;
+    for (unsigned i = 0, e = Parts.size(); i != e; ++i)
+      IdxParts.push_back(RegBank.getSubRegIdx(Parts[i]));
+    RegBank.addConcatSubRegIndex(IdxParts, this);
+  }
 }
 
 void CodeGenSubRegIndex::cleanComposites() {
@@ -187,10 +198,7 @@ bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) {
   unsigned OldNumUnits = RegUnits.size();
   for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
        I != E; ++I) {
-    // Strangely a register may have itself as a subreg (self-cycle) e.g. XMM.
     CodeGenRegister *SR = I->second;
-    if (SR == this)
-      continue;
     // Merge the subregister's units into this register's RegUnits.
     mergeRegUnits(RegUnits, SR->RegUnits);
   }
@@ -260,44 +268,6 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
     }
   }
 
-  // Process the composites.
-  ListInit *Comps = TheDef->getValueAsListInit("CompositeIndices");
-  for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
-    DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i));
-    if (!Pat)
-      throw TGError(TheDef->getLoc(), "Invalid dag '" +
-                    Comps->getElement(i)->getAsString() +
-                    "' in CompositeIndices");
-    DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator());
-    if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
-      throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
-                    Pat->getAsString());
-    CodeGenSubRegIndex *BaseIdx = RegBank.getSubRegIdx(BaseIdxInit->getDef());
-
-    // Resolve list of subreg indices into R2.
-    CodeGenRegister *R2 = this;
-    for (DagInit::const_arg_iterator di = Pat->arg_begin(),
-         de = Pat->arg_end(); di != de; ++di) {
-      DefInit *IdxInit = dynamic_cast<DefInit*>(*di);
-      if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
-        throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
-                      Pat->getAsString());
-      CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxInit->getDef());
-      const SubRegMap &R2Subs = R2->computeSubRegs(RegBank);
-      SubRegMap::const_iterator ni = R2Subs.find(Idx);
-      if (ni == R2Subs.end())
-        throw TGError(TheDef->getLoc(), "Composite " + Pat->getAsString() +
-                      " refers to bad index in " + R2->getName());
-      R2 = ni->second;
-    }
-
-    // Insert composite index. Allow overriding inherited indices etc.
-    SubRegs[BaseIdx] = R2;
-
-    // R2 is no longer an orphan.
-    Orphans.erase(R2);
-  }
-
   // Now Orphans contains the inherited subregisters without a direct index.
   // Create inferred indexes for all missing entries.
   // Work backwards in the Indices vector in order to compose subregs bottom-up.
@@ -327,14 +297,25 @@ CodeGenRegister::computeSubRegs(CodeGenRegBank &RegBank) {
   // Compute the inverse SubReg -> Idx map.
   for (SubRegMap::const_iterator SI = SubRegs.begin(), SE = SubRegs.end();
        SI != SE; ++SI) {
-    // Ignore idempotent sub-register indices.
-    if (SI->second == this)
+    if (SI->second == this) {
+      SMLoc Loc;
+      if (TheDef)
+        Loc = TheDef->getLoc();
+      throw TGError(Loc, "Register " + getName() +
+                    " has itself as a sub-register");
+    }
+    // Ensure that every sub-register has a unique name.
+    DenseMap<const CodeGenRegister*, CodeGenSubRegIndex*>::iterator Ins =
+      SubReg2Idx.insert(std::make_pair(SI->second, SI->first)).first;
+    if (Ins->second == SI->first)
       continue;
-    // Is is possible to have multiple names for the same sub-register.
-    // For example, XMM0 appears as sub_xmm, sub_sd, and sub_ss in YMM0.
-    // Eventually, this degeneration should go away, but for now we simply give
-    // precedence to the explicit sub-register index over the inherited ones.
-    SubReg2Idx.insert(std::make_pair(SI->second, SI->first));
+    // Trouble: Two different names for SI->second.
+    SMLoc Loc;
+    if (TheDef)
+      Loc = TheDef->getLoc();
+    throw TGError(Loc, "Sub-register can't have two names: " +
+                  SI->second->getName() + " available as " +
+                  SI->first->getName() + " and " + Ins->second->getName());
   }
 
   // Derive possible names for sub-register concatenations from any explicit
@@ -508,8 +489,6 @@ void CodeGenRegister::computeSuperRegs(CodeGenRegBank &RegBank) {
     Id.push_back(I->first->EnumValue);
     Id.push_back(I->second->TopoSig);
 
-    if (I->second == this)
-      continue;
     // Don't add duplicate entries.
     if (!I->second->SuperRegs.empty() && I->second->SuperRegs.back() == this)
       continue;
@@ -530,8 +509,7 @@ CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet,
   // Add any secondary sub-registers that weren't part of the explicit tree.
   for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
        I != E; ++I)
-    if (I->second != this)
-      OSet.insert(I->second);
+    OSet.insert(I->second);
 }
 
 // Compute overlapping registers.
@@ -970,7 +948,7 @@ void CodeGenRegisterClass::buildRegUnitSet(
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//
 
-CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
+CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) {
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
   Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
@@ -980,7 +958,6 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   // More indices will be synthesized later.
   std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
   std::sort(SRIs.begin(), SRIs.end(), LessRecord());
-  NumNamedIndices = SRIs.size();
   for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
     getSubRegIdx(SRIs[i]);
   // Build composite maps from ComposedOf fields.
@@ -1048,6 +1025,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   CodeGenRegisterClass::computeSubClasses(*this);
 }
 
+// Create a synthetic CodeGenSubRegIndex without a corresponding Record.
+CodeGenSubRegIndex*
+CodeGenRegBank::createSubRegIndex(StringRef Name, StringRef Namespace) {
+  CodeGenSubRegIndex *Idx = new CodeGenSubRegIndex(Name, Namespace,
+                                                   SubRegIndices.size() + 1);
+  SubRegIndices.push_back(Idx);
+  return Idx;
+}
+
 CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
   CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def];
   if (Idx)
@@ -1112,7 +1098,7 @@ CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,
 
   // None exists, synthesize one.
   std::string Name = A->getName() + "_then_" + B->getName();
-  Comp = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  Comp = createSubRegIndex(Name, A->getNamespace());
   A->addComposite(B, Comp);
   return Comp;
 }
@@ -1132,7 +1118,7 @@ getConcatSubRegIndex(const SmallVector<CodeGenSubRegIndex*, 8> &Parts) {
     Name += '_';
     Name += Parts[i]->getName();
   }
-  return Idx = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  return Idx = createSubRegIndex(Name, Parts.front()->getNamespace());
 }
 
 void CodeGenRegBank::computeComposites() {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index eb6724e..827063e 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -35,14 +35,17 @@ namespace llvm {
   /// CodeGenSubRegIndex - Represents a sub-register index.
   class CodeGenSubRegIndex {
     Record *const TheDef;
+    std::string Name;
+    std::string Namespace;
 
   public:
     const unsigned EnumValue;
 
     CodeGenSubRegIndex(Record *R, unsigned Enum);
+    CodeGenSubRegIndex(StringRef N, StringRef Nspace, unsigned Enum);
 
-    const std::string &getName() const;
-    std::string getNamespace() const;
+    const std::string &getName() const { return Name; }
+    const std::string &getNamespace() const { return Namespace; }
     std::string getQualifiedName() const;
 
     // Order CodeGenSubRegIndex pointers by EnumValue.
@@ -422,13 +425,13 @@ namespace llvm {
   // CodeGenRegBank - Represent a target's registers and the relations between
   // them.
   class CodeGenRegBank {
-    RecordKeeper &Records;
     SetTheory Sets;
 
     // SubRegIndices.
     std::vector<CodeGenSubRegIndex*> SubRegIndices;
     DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
-    unsigned NumNamedIndices;
+
+    CodeGenSubRegIndex *createSubRegIndex(StringRef Name, StringRef NameSpace);
 
     typedef std::map<SmallVector<CodeGenSubRegIndex*, 8>,
                      CodeGenSubRegIndex*> ConcatIdxMap;
@@ -495,7 +498,6 @@ namespace llvm {
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
     ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; }
-    unsigned getNumNamedIndices() { return NumNamedIndices; }
 
     // Find a SubRegIndex form its Record def.
     CodeGenSubRegIndex *getSubRegIdx(Record*);
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 2cdde55..e89c393 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -17,9 +17,15 @@
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
@@ -35,9 +41,7 @@ struct EncodingField {
   EncodingField(unsigned B, unsigned W, unsigned O)
     : Base(B), Width(W), Offset(O) { }
 };
-} // End anonymous namespace
 
-namespace {
 struct OperandInfo {
   std::vector<EncodingField> Fields;
   std::string Decoder;
@@ -56,10 +60,25 @@ struct OperandInfo {
   const_iterator begin() const { return Fields.begin(); }
   const_iterator end() const   { return Fields.end();   }
 };
+
+typedef std::vector<uint8_t> DecoderTable;
+typedef uint32_t DecoderFixup;
+typedef std::vector<DecoderFixup> FixupList;
+typedef std::vector<FixupList> FixupScopeList;
+typedef SetVector<std::string> PredicateSet;
+typedef SetVector<std::string> DecoderSet;
+struct DecoderTableInfo {
+  DecoderTable Table;
+  FixupScopeList FixupStack;
+  PredicateSet Predicates;
+  DecoderSet Decoders;
+};
+
 } // End anonymous namespace
 
 namespace {
 class FixedLenDecoderEmitter {
+  const std::vector<const CodeGenInstruction*> *NumberedInstructions;
 public:
 
   // Defaults preserved here for documentation, even though they aren't
@@ -77,6 +96,17 @@ public:
     GuardPrefix(GPrefix), GuardPostfix(GPostfix),
     ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
 
+  // Emit the decoder state machine table.
+  void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
+                 unsigned Indentation, unsigned BitWidth,
+                 StringRef Namespace) const;
+  void emitPredicateFunction(formatted_raw_ostream &OS,
+                             PredicateSet &Predicates,
+                             unsigned Indentation) const;
+  void emitDecoderFunction(formatted_raw_ostream &OS,
+                           DecoderSet &Decoders,
+                           unsigned Indentation) const;
+
   // run - Output the code emitter
   void run(raw_ostream &o);
 
@@ -120,9 +150,7 @@ static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
 }
 // Prints the bit value for each position.
 static void dumpBits(raw_ostream &o, const BitsInit &bits) {
-  unsigned index;
-
-  for (index = bits.getNumBits(); index > 0; index--) {
+  for (unsigned index = bits.getNumBits(); index > 0; --index) {
     switch (bitFromBits(bits, index - 1)) {
     case BIT_TRUE:
       o << "1";
@@ -238,8 +266,9 @@ public:
   // match the remaining undecoded encoding bits against the singleton.
   void recurse();
 
-  // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation) const;
+  // Emit table entries to decode instructions given a segment or segments of
+  // bits.
+  void emitTableEntry(DecoderTableInfo &TableInfo) const;
 
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
@@ -338,12 +367,7 @@ public:
     doFilter();
   }
 
-  // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() const { return Parent == NULL; }
-
-  // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned Indentation,
-               const std::string &Namespace) const;
+  unsigned getBitWidth() const { return BitWidth; }
 
 protected:
   // Populates the insn given the uid.
@@ -414,21 +438,28 @@ protected:
   bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
                           unsigned Opc) const;
 
-  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                         unsigned Opc) const;
+  bool doesOpcodeNeedPredicate(unsigned Opc) const;
+  unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
+  void emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
+
+  void emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                              unsigned Opc) const;
 
-  // Emits code to decode the singleton.  Return true if we have matched all the
-  // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            unsigned Opc) const;
+  // Emits table entries to decode the singleton.
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               unsigned Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            const Filter &Best) const;
+  void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                               const Filter &Best) const;
 
-  void emitBinaryParser(raw_ostream &o , unsigned &Indentation,
+  void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
                         const OperandInfo &OpInfo) const;
 
+  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const;
+  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const;
+
   // Assign a single filter and run with it.
   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
 
@@ -447,10 +478,10 @@ protected:
   // dump the conflict set to the standard error.
   void doFilter();
 
-  // Emits code to decode our share of instructions.  Returns true if the
-  // emitted code causes a return, which occurs if we know how to decode
-  // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation) const;
+public:
+  // emitTableEntries - Emit state machine entries to decode our share of
+  // instructions.
+  void emitTableEntries(DecoderTableInfo &TableInfo) const;
 };
 } // End anonymous namespace
 
@@ -524,11 +555,9 @@ void Filter::recurse() {
   // Starts by inheriting our parent filter chooser's filter bit values.
   std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
 
-  unsigned bitIndex;
-
   if (VariableInstructions.size()) {
     // Conservatively marks each segment position as BIT_UNSET.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex)
       BitValueArray[StartBit + bitIndex] = BIT_UNSET;
 
     // Delegates to an inferior filter chooser for further processing on this
@@ -544,7 +573,7 @@ void Filter::recurse() {
   }
 
   // No need to recurse for a singleton filtered instruction.
-  // See also Filter::emit().
+  // See also Filter::emit*().
   if (getNumFiltered() == 1) {
     //Owner->SingletonExists(LastOpcFiltered);
     assert(FilterChooserMap.size() == 1);
@@ -557,7 +586,7 @@ void Filter::recurse() {
        mapIterator++) {
 
     // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+    for (unsigned bitIndex = 0; bitIndex < NumBits; ++bitIndex) {
       if (mapIterator->first & (1ULL << bitIndex))
         BitValueArray[StartBit + bitIndex] = BIT_TRUE;
       else
@@ -577,64 +606,100 @@ void Filter::recurse() {
   }
 }
 
-// Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) const {
-  o.indent(Indentation) << "// Check Inst{";
-
-  if (NumBits > 1)
-    o << (StartBit + NumBits - 1) << '-';
+static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
+                               uint32_t DestIdx) {
+  // Any NumToSkip fixups in the current scope can resolve to the
+  // current location.
+  for (FixupList::const_reverse_iterator I = Fixups.rbegin(),
+                                         E = Fixups.rend();
+       I != E; ++I) {
+    // Calculate the distance from the byte following the fixup entry byte
+    // to the destination. The Target is calculated from after the 16-bit
+    // NumToSkip entry itself, so subtract two  from the displacement here
+    // to account for that.
+    uint32_t FixupIdx = *I;
+    uint32_t Delta = DestIdx - FixupIdx - 2;
+    // Our NumToSkip entries are 16-bits. Make sure our table isn't too
+    // big.
+    assert(Delta < 65536U && "disassembler decoding table too large!");
+    Table[FixupIdx] = (uint8_t)Delta;
+    Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
+  }
+}
 
-  o << StartBit << "} ...\n";
+// Emit table entries to decode instructions given a segment or segments
+// of bits.
+void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
+  TableInfo.Table.push_back(MCD::OPC_ExtractField);
+  TableInfo.Table.push_back(StartBit);
+  TableInfo.Table.push_back(NumBits);
 
-  o.indent(Indentation) << "switch (fieldFromInstruction" << Owner->BitWidth
-                        << "(insn, " << StartBit << ", "
-                        << NumBits << ")) {\n";
+  // A new filter entry begins a new scope for fixup resolution.
+  TableInfo.FixupStack.push_back(FixupList());
 
   std::map<unsigned, const FilterChooser*>::const_iterator filterIterator;
 
-  bool DefaultCase = false;
+  DecoderTable &Table = TableInfo.Table;
+
+  size_t PrevFilter = 0;
+  bool HasFallthrough = false;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
-
     // Field value -1 implies a non-empty set of variable instructions.
     // See also recurse().
     if (filterIterator->first == (unsigned)-1) {
-      DefaultCase = true;
-
-      o.indent(Indentation) << "default:\n";
-      o.indent(Indentation) << "  break; // fallthrough\n";
-
-      // Closing curly brace for the switch statement.
-      // This is unconventional because we want the default processing to be
-      // performed for the fallthrough cases as well, i.e., when the "cases"
-      // did not prove a decoded instruction.
-      o.indent(Indentation) << "}\n";
-
-    } else
-      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+      HasFallthrough = true;
+
+      // Each scope should always have at least one filter value to check
+      // for.
+      assert(PrevFilter != 0 && "empty filter set!");
+      FixupList &CurScope = TableInfo.FixupStack.back();
+      // Resolve any NumToSkip fixups in the current scope.
+      resolveTableFixups(Table, CurScope, Table.size());
+      CurScope.clear();
+      PrevFilter = 0;  // Don't re-process the filter's fallthrough.
+    } else {
+      Table.push_back(MCD::OPC_FilterValue);
+      // Encode and emit the value to filter against.
+      uint8_t Buffer[8];
+      unsigned Len = encodeULEB128(filterIterator->first, Buffer);
+      Table.insert(Table.end(), Buffer, Buffer + Len);
+      // Reserve space for the NumToSkip entry. We'll backpatch the value
+      // later.
+      PrevFilter = Table.size();
+      Table.push_back(0);
+      Table.push_back(0);
+    }
 
     // We arrive at a category of instructions with the same segment value.
     // Now delegate to the sub filter chooser for further decodings.
     // The case may fallthrough, which happens if the remaining well-known
     // encoding bits do not match exactly.
-    if (!DefaultCase) { ++Indentation; ++Indentation; }
-
-    filterIterator->second->emit(o, Indentation);
-    // For top level default case, there's no need for a break statement.
-    if (Owner->isTopLevel() && DefaultCase)
-      break;
-    
-    o.indent(Indentation) << "break;\n";
-
-    if (!DefaultCase) { --Indentation; --Indentation; }
+    filterIterator->second->emitTableEntries(TableInfo);
+
+    // Now that we've emitted the body of the handler, update the NumToSkip
+    // of the filter itself to be able to skip forward when false. Subtract
+    // two as to account for the width of the NumToSkip field itself.
+    if (PrevFilter) {
+      uint32_t NumToSkip = Table.size() - PrevFilter - 2;
+      assert(NumToSkip < 65536U && "disassembler decoding table too large!");
+      Table[PrevFilter] = (uint8_t)NumToSkip;
+      Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
+    }
   }
 
-  // If there is no default case, we still need to supply a closing brace.
-  if (!DefaultCase) {
-    // Closing curly brace for the switch statement.
-    o.indent(Indentation) << "}\n";
-  }
+  // Any remaining unresolved fixups bubble up to the parent fixup scope.
+  assert(TableInfo.FixupStack.size() > 1 && "fixup stack underflow!");
+  FixupScopeList::iterator Source = TableInfo.FixupStack.end() - 1;
+  FixupScopeList::iterator Dest = Source - 1;
+  Dest->insert(Dest->end(), Source->begin(), Source->end());
+  TableInfo.FixupStack.pop_back();
+
+  // If there is no fallthrough, then the final filter should get fixed
+  // up according to the enclosing scope rather than the current position.
+  if (!HasFallthrough)
+    TableInfo.FixupStack.back().push_back(PrevFilter);
 }
 
 // Returns the number of fanout produced by the filter.  More fanout implies
@@ -652,31 +717,205 @@ unsigned Filter::usefulness() const {
 //                              //
 //////////////////////////////////
 
-// Emit the top level typedef and decodeInstruction() function.
-void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
-                            const std::string &Namespace) const {
-  o.indent(Indentation) <<
-    "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction"
-    << BitWidth << "(MCInst &MI, uint" << BitWidth
-    << "_t insn, uint64_t Address, "
-    << "const void *Decoder, const MCSubtargetInfo &STI) {\n";
-  o.indent(Indentation) << "  unsigned tmp = 0;\n";
-  o.indent(Indentation) << "  (void)tmp;\n";
-  o.indent(Indentation) << Emitter->Locals << "\n";
-  o.indent(Indentation) << "  uint64_t Bits = STI.getFeatureBits();\n";
-  o.indent(Indentation) << "  (void)Bits;\n";
-
-  ++Indentation; ++Indentation;
-  // Emits code to decode the instructions.
-  emit(o, Indentation);
-
-  o << '\n';
-  o.indent(Indentation) << "return " << Emitter->ReturnFail << ";\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-
-  o << '\n';
+// Emit the decoder state machine table.
+void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
+                                       DecoderTable &Table,
+                                       unsigned Indentation,
+                                       unsigned BitWidth,
+                                       StringRef Namespace) const {
+  OS.indent(Indentation) << "static const uint8_t DecoderTable" << Namespace
+    << BitWidth << "[] = {\n";
+
+  Indentation += 2;
+
+  // FIXME: We may be able to use the NumToSkip values to recover
+  // appropriate indentation levels.
+  DecoderTable::const_iterator I = Table.begin();
+  DecoderTable::const_iterator E = Table.end();
+  while (I != E) {
+    assert (I < E && "incomplete decode table entry!");
+
+    uint64_t Pos = I - Table.begin();
+    OS << "/* " << Pos << " */";
+    OS.PadToColumn(12);
+
+    switch (*I) {
+    default:
+      throw "invalid decode table opcode";
+    case MCD::OPC_ExtractField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_ExtractField, " << Start << ", "
+        << Len << ",  // Inst{";
+      if (Len > 1)
+        OS << (Start + Len - 1) << "-";
+      OS << Start << "} ...\n";
+      break;
+    }
+    case MCD::OPC_FilterValue: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_FilterValue, ";
+      // The filter value is ULEB128 encoded.
+      while (*I >= 128)
+        OS << utostr(*I++) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckField: {
+      ++I;
+      unsigned Start = *I++;
+      unsigned Len = *I++;
+      OS.indent(Indentation) << "MCD::OPC_CheckField, " << Start << ", "
+        << Len << ", ";// << Val << ", " << NumToSkip << ",\n";
+      // ULEB128 encoded field value.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_CheckPredicate: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_CheckPredicate, ";
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+      OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
+      break;
+    }
+    case MCD::OPC_Decode: {
+      ++I;
+      // Extract the ULEB128 encoded Opcode to a buffer.
+      uint8_t Buffer[8], *p = Buffer;
+      while ((*p++ = *I++) >= 128)
+        assert((p - Buffer) <= (ptrdiff_t)sizeof(Buffer)
+               && "ULEB128 value too large!");
+      // Decode the Opcode value.
+      unsigned Opc = decodeULEB128(Buffer);
+      OS.indent(Indentation) << "MCD::OPC_Decode, ";
+      for (p = Buffer; *p >= 128; ++p)
+        OS << utostr(*p) << ", ";
+      OS << utostr(*p) << ", ";
+
+      // Decoder index.
+      for (; *I >= 128; ++I)
+        OS << utostr(*I) << ", ";
+      OS << utostr(*I++) << ", ";
+
+      OS << "// Opcode: "
+         << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+      break;
+    }
+    case MCD::OPC_SoftFail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_SoftFail";
+      // Positive mask
+      uint64_t Value = 0;
+      unsigned Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      // Negative mask
+      Value = 0;
+      Shift = 0;
+      do {
+        OS << ", " << utostr(*I);
+        Value += (*I & 0x7f) << Shift;
+        Shift += 7;
+      } while (*I++ >= 128);
+      if (Value > 127)
+        OS << " /* 0x" << utohexstr(Value) << " */";
+      OS << ",\n";
+      break;
+    }
+    case MCD::OPC_Fail: {
+      ++I;
+      OS.indent(Indentation) << "MCD::OPC_Fail,\n";
+      break;
+    }
+    }
+  }
+  OS.indent(Indentation) << "0\n";
+
+  Indentation -= 2;
+
+  OS.indent(Indentation) << "};\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitPredicateFunction(formatted_raw_ostream &OS, PredicateSet &Predicates,
+                      unsigned Indentation) const {
+  // The predicate function is just a big switch statement based on the
+  // input predicate index.
+  OS.indent(Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
+    << "uint64_t Bits) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (PredicateSet::const_iterator I = Predicates.begin(), E = Predicates.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS.indent(Indentation+2) << "return (" << *I << ");\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
+}
+
+void FixedLenDecoderEmitter::
+emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
+                    unsigned Indentation) const {
+  // The decoder function is just a big switch statement based on the
+  // input decoder index.
+  OS.indent(Indentation) << "template<typename InsnType>\n";
+  OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
+    << " unsigned Idx, InsnType insn, MCInst &MI,\n";
+  OS.indent(Indentation) << "                                   uint64_t "
+    << "Address, const void *Decoder) {\n";
+  Indentation += 2;
+  OS.indent(Indentation) << "InsnType tmp;\n";
+  OS.indent(Indentation) << "switch (Idx) {\n";
+  OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
+  unsigned Index = 0;
+  for (DecoderSet::const_iterator I = Decoders.begin(), E = Decoders.end();
+       I != E; ++I, ++Index) {
+    OS.indent(Indentation) << "case " << Index << ":\n";
+    OS << *I;
+    OS.indent(Indentation+2) << "return S;\n";
+  }
+  OS.indent(Indentation) << "}\n";
+  Indentation -= 2;
+  OS.indent(Indentation) << "}\n\n";
 }
 
 // Populates the field of the insn given the start position and the number of
@@ -703,9 +942,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 /// filter array as a series of chars.
 void FilterChooser::dumpFilterArray(raw_ostream &o,
                                  const std::vector<bit_value_t> &filter) const {
-  unsigned bitIndex;
-
-  for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
+  for (unsigned bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
     switch (filter[bitIndex - 1]) {
     case BIT_UNFILTERED:
       o << ".";
@@ -827,26 +1064,71 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
 
   if (OpInfo.numFields() == 1) {
     OperandInfo::const_iterator OI = OpInfo.begin();
-    o.indent(Indentation) << "  tmp = fieldFromInstruction" << BitWidth
-                            << "(insn, " << OI->Base << ", " << OI->Width
-                            << ");\n";
+    o.indent(Indentation) << "tmp = fieldFromInstruction"
+                          << "(insn, " << OI->Base << ", " << OI->Width
+                          << ");\n";
   } else {
-    o.indent(Indentation) << "  tmp = 0;\n";
+    o.indent(Indentation) << "tmp = 0;\n";
     for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end();
          OI != OE; ++OI) {
-      o.indent(Indentation) << "  tmp |= (fieldFromInstruction" << BitWidth
+      o.indent(Indentation) << "tmp |= (fieldFromInstruction"
                             << "(insn, " << OI->Base << ", " << OI->Width
                             << ") << " << OI->Offset << ");\n";
     }
   }
 
   if (Decoder != "")
-    o.indent(Indentation) << "  " << Emitter->GuardPrefix << Decoder
+    o.indent(Indentation) << Emitter->GuardPrefix << Decoder
                           << "(MI, tmp, Address, Decoder)"
                           << Emitter->GuardPostfix << "\n";
   else
-    o.indent(Indentation) << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+    o.indent(Indentation) << "MI.addOperand(MCOperand::CreateImm(tmp));\n";
+
+}
+
+void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
+                                unsigned Opc) const {
+  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+    Operands.find(Opc);
+  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+  for (std::vector<OperandInfo>::const_iterator
+       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+    // If a custom instruction decoder was specified, use that.
+    if (I->numFields() == 0 && I->Decoder.size()) {
+      OS.indent(Indentation) << Emitter->GuardPrefix << I->Decoder
+        << "(MI, insn, Address, Decoder)"
+        << Emitter->GuardPostfix << "\n";
+      break;
+    }
 
+    emitBinaryParser(OS, Indentation, *I);
+  }
+}
+
+unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
+                                        unsigned Opc) const {
+  // Build up the predicate string.
+  SmallString<256> Decoder;
+  // FIXME: emitDecoder() function can take a buffer directly rather than
+  // a stream.
+  raw_svector_ostream S(Decoder);
+  unsigned I = 4;
+  emitDecoder(S, I, Opc);
+  S.flush();
+
+  // Using the full decoder string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  Decoders.insert(Decoder.str());
+  // Now figure out the index for when we write out the table.
+  DecoderSet::const_iterator P = std::find(Decoders.begin(),
+                                           Decoders.end(),
+                                           Decoder.str());
+  return (unsigned)(P - Decoders.begin());
 }
 
 static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
@@ -887,8 +1169,74 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
   return Predicates->getSize() > 0;
 }
 
-void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
-                                      unsigned Opc) const {
+bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
+  ListInit *Predicates =
+    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+  for (unsigned i = 0; i < Predicates->getSize(); ++i) {
+    Record *Pred = Predicates->getElementAsRecord(i);
+    if (!Pred->getValue("AssemblerMatcherPredicate"))
+      continue;
+
+    std::string P = Pred->getValueAsString("AssemblerCondString");
+
+    if (!P.length())
+      continue;
+
+    return true;
+  }
+  return false;
+}
+
+unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
+                                          StringRef Predicate) const {
+  // Using the full predicate string as the key value here is a bit
+  // heavyweight, but is effective. If the string comparisons become a
+  // performance concern, we can implement a mangling of the predicate
+  // data easilly enough with a map back to the actual string. That's
+  // overkill for now, though.
+
+  // Make sure the predicate is in the table.
+  TableInfo.Predicates.insert(Predicate.str());
+  // Now figure out the index for when we write out the table.
+  PredicateSet::const_iterator P = std::find(TableInfo.Predicates.begin(),
+                                             TableInfo.Predicates.end(),
+                                             Predicate.str());
+  return (unsigned)(P - TableInfo.Predicates.begin());
+}
+
+void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
+  if (!doesOpcodeNeedPredicate(Opc))
+    return;
+
+  // Build up the predicate string.
+  SmallString<256> Predicate;
+  // FIXME: emitPredicateMatch() functions can take a buffer directly rather
+  // than a stream.
+  raw_svector_ostream PS(Predicate);
+  unsigned I = 0;
+  emitPredicateMatch(PS, I, Opc);
+
+  // Figure out the index into the predicate table for the predicate just
+  // computed.
+  unsigned PIdx = getPredicateIndex(TableInfo, PS.str());
+  SmallString<16> PBytes;
+  raw_svector_ostream S(PBytes);
+  encodeULEB128(PIdx, S);
+  S.flush();
+
+  TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
+  // Predicate index
+  for (unsigned i = 0, e = PBytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(PBytes[i]);
+  // Push location for NumToSkip backpatching.
+  TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+  TableInfo.Table.push_back(0);
+  TableInfo.Table.push_back(0);
+}
+
+void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
+                                           unsigned Opc) const {
   BitsInit *SFBits =
     AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
   if (!SFBits) return;
@@ -914,13 +1262,11 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
     default:
       // The bit is not set; this must be an error!
       StringRef Name = AllInstructions[Opc]->TheDef->getName();
-      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
-             << Name
-             << " is set but Inst{" << i <<"} is unset!\n"
+      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in " << Name
+             << " is set but Inst{" << i << "} is unset!\n"
              << "  - You can only mark a bit as SoftFail if it is fully defined"
              << " (1/0 - not '?') in Inst\n";
-      o << "#error SoftFail Conflict, " << Name << "::SoftFail{" << i 
-        << "} set but Inst{" << i << "} undefined!\n";
+      return;
     }
   }
 
@@ -930,27 +1276,31 @@ void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
   if (!NeedPositiveMask && !NeedNegativeMask)
     return;
 
-  std::string PositiveMaskStr = PositiveMask.toString(16, /*signed=*/false);
-  std::string NegativeMaskStr = NegativeMask.toString(16, /*signed=*/false);
-  StringRef BitExt = "";
-  if (BitWidth > 32)
-    BitExt = "ULL";
-
-  o.indent(Indentation) << "if (";
-  if (NeedPositiveMask)
-    o << "insn & 0x" << PositiveMaskStr << BitExt;
-  if (NeedPositiveMask && NeedNegativeMask)
-    o << " || ";
-  if (NeedNegativeMask)
-    o << "~insn & 0x" << NegativeMaskStr << BitExt;
-  o << ")\n";
-  o.indent(Indentation+2) << "S = MCDisassembler::SoftFail;\n";
+  TableInfo.Table.push_back(MCD::OPC_SoftFail);
+
+  SmallString<16> MaskBytes;
+  raw_svector_ostream S(MaskBytes);
+  if (NeedPositiveMask) {
+    encodeULEB128(PositiveMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
+  if (NeedNegativeMask) {
+    MaskBytes.clear();
+    S.resync();
+    encodeULEB128(NegativeMask.getZExtValue(), S);
+    S.flush();
+    for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
+      TableInfo.Table.push_back(MaskBytes[i]);
+  } else
+    TableInfo.Table.push_back(0);
 }
 
-// Emits code to decode the singleton.  Return true if we have matched all the
-// well-known bits.
-bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) const {
+// Emits table entries to decode the singleton.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            unsigned Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
@@ -961,107 +1311,70 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
   getIslands(StartBits, EndBits, FieldVals, Insn);
 
   unsigned Size = StartBits.size();
-  unsigned I, NumBits;
 
-  // If we have matched all the well-known bits, just issue a return.
-  if (Size == 0) {
-    o.indent(Indentation) << "if (";
-    if (!emitPredicateMatch(o, Indentation, Opc))
-      o << "1";
-    o << ") {\n";
-    emitSoftFailCheck(o, Indentation+2, Opc);
-    o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-    std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-      Operands.find(Opc);
-    const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-    for (std::vector<OperandInfo>::const_iterator
-         I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-      // If a custom instruction decoder was specified, use that.
-      if (I->numFields() == 0 && I->Decoder.size()) {
-        o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                              << "(MI, insn, Address, Decoder)"
-                              << Emitter->GuardPostfix << "\n";
-        break;
-      }
-
-      emitBinaryParser(o, Indentation, *I);
-    }
-
-    o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                          << nameWithID(Opc) << '\n';
-    o.indent(Indentation) << "}\n"; // Closing predicate block.
-    return true;
-  }
-
-  // Otherwise, there are more decodings to be done!
-
-  // Emit code to match the island(s) for the singleton.
-  o.indent(Indentation) << "// Check ";
-
-  for (I = Size; I != 0; --I) {
-    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
-    if (I > 1)
-      o << " && ";
-    else
-      o << "for singleton decoding...\n";
-  }
-
-  o.indent(Indentation) << "if (";
-  if (emitPredicateMatch(o, Indentation, Opc)) {
-    o << " &&\n";
-    o.indent(Indentation+4);
+  // Emit the predicate table entry if one is needed.
+  emitPredicateTableEntry(TableInfo, Opc);
+
+  // Check any additional encoding fields needed.
+  for (unsigned I = Size; I != 0; --I) {
+    unsigned NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    TableInfo.Table.push_back(MCD::OPC_CheckField);
+    TableInfo.Table.push_back(StartBits[I-1]);
+    TableInfo.Table.push_back(NumBits);
+    uint8_t Buffer[8], *p;
+    encodeULEB128(FieldVals[I-1], Buffer);
+    for (p = Buffer; *p >= 128 ; ++p)
+      TableInfo.Table.push_back(*p);
+    TableInfo.Table.push_back(*p);
+    // Push location for NumToSkip backpatching.
+    TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+    // The fixup is always 16-bits, so go ahead and allocate the space
+    // in the table so all our relative position calculations work OK even
+    // before we fully resolve the real value here.
+    TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
   }
 
-  for (I = Size; I != 0; --I) {
-    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
-    o << "fieldFromInstruction" << BitWidth << "(insn, "
-      << StartBits[I-1] << ", " << NumBits
-      << ") == " << FieldVals[I-1];
-    if (I > 1)
-      o << " && ";
-    else
-      o << ") {\n";
-  }
-  emitSoftFailCheck(o, Indentation+2, Opc);
-  o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
-    Operands.find(Opc);
-  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
-  for (std::vector<OperandInfo>::const_iterator
-       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
-    // If a custom instruction decoder was specified, use that.
-    if (I->numFields() == 0 && I->Decoder.size()) {
-      o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                            << "(MI, insn, Address, Decoder)"
-                            << Emitter->GuardPostfix << "\n";
-      break;
-    }
-
-    emitBinaryParser(o, Indentation, *I);
-  }
-  o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
-                        << nameWithID(Opc) << '\n';
-  o.indent(Indentation) << "}\n";
-
-  return false;
+  // Check for soft failure of the match.
+  emitSoftFailTableEntry(TableInfo, Opc);
+
+  TableInfo.Table.push_back(MCD::OPC_Decode);
+  uint8_t Buffer[8], *p;
+  encodeULEB128(Opc, Buffer);
+  for (p = Buffer; *p >= 128 ; ++p)
+    TableInfo.Table.push_back(*p);
+  TableInfo.Table.push_back(*p);
+
+  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc);
+  SmallString<16> Bytes;
+  raw_svector_ostream S(Bytes);
+  encodeULEB128(DIdx, S);
+  S.flush();
+
+  // Decoder index
+  for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
+    TableInfo.Table.push_back(Bytes[i]);
 }
 
-// Emits code to decode the singleton, and then to decode the rest.
-void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         const Filter &Best) const {
-
+// Emits table entries to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
+                                            const Filter &Best) const {
   unsigned Opc = Best.getSingletonOpc();
 
-  emitSingletonDecoder(o, Indentation, Opc);
+  // complex singletons need predicate checks from the first singleton
+  // to refer forward to the variable filterchooser that follows.
+  TableInfo.FixupStack.push_back(FixupList());
 
-  // Emit code for the rest.
-  o.indent(Indentation) << "else\n";
+  emitSingletonTableEntry(TableInfo, Opc);
 
-  Indentation += 2;
-  Best.getVariableFC().emit(o, Indentation);
-  Indentation -= 2;
+  resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                     TableInfo.Table.size());
+  TableInfo.FixupStack.pop_back();
+
+  Best.getVariableFC().emitTableEntries(TableInfo);
 }
 
+
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
@@ -1119,7 +1432,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     }
   }
 
-  unsigned BitIndex, InsnIndex;
+  unsigned BitIndex;
 
   // We maintain BIT_WIDTH copies of the bitAttrs automaton.
   // The automaton consumes the corresponding bit from each
@@ -1149,7 +1462,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
     else
       bitAttrs.push_back(ATTR_NONE);
 
-  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+  for (unsigned InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
     insn_t insn;
 
     insnWithID(insn, Opcodes[InsnIndex]);
@@ -1200,7 +1513,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
   bitAttr_t RA = ATTR_NONE;
   unsigned StartBit = 0;
 
-  for (BitIndex = 0; BitIndex < BitWidth; BitIndex++) {
+  for (BitIndex = 0; BitIndex < BitWidth; ++BitIndex) {
     bitAttr_t bitAttr = bitAttrs[BitIndex];
 
     assert(bitAttr != ATTR_NONE && "Bit without attributes");
@@ -1341,36 +1654,29 @@ void FilterChooser::doFilter() {
   BestIndex = -1;
 }
 
-// Emits code to decode our share of instructions.  Returns true if the
-// emitted code causes a return, which occurs if we know how to decode
-// the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
-  if (Opcodes.size() == 1)
+// emitTableEntries - Emit state machine entries to decode our share of
+// instructions.
+void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
+  if (Opcodes.size() == 1) {
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
     // encodings bits.
-    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+    emitSingletonTableEntry(TableInfo, Opcodes[0]);
+    return;
+  }
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
     const Filter &Best = Filters[BestIndex];
     if (Best.getNumFiltered() == 1)
-      emitSingletonDecoder(o, Indentation, Best);
+      emitSingletonTableEntry(TableInfo, Best);
     else
-      Best.emit(o, Indentation);
-    return false;
+      Best.emitTableEntry(TableInfo);
+    return;
   }
 
-  // We don't know how to decode these instructions!  Return 0 and dump the
-  // conflict set!
-  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
-  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
-    o << nameWithID(Opcodes[i]);
-    if (i < (N - 1))
-      o << ", ";
-    else
-      o << '\n';
-  }
+  // We don't know how to decode these instructions!  Dump the
+  // conflict set and bail.
 
   // Print out useful conflict information for postmortem analysis.
   errs() << "Decoding Conflict:\n";
@@ -1385,8 +1691,6 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
              getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
     errs() << '\n';
   }
-
-  return true;
 }
 
 static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
@@ -1549,62 +1853,168 @@ static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
   return true;
 }
 
-static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) {
-  unsigned Indentation = 0;
-  std::string WidthStr = "uint" + utostr(BitWidth) + "_t";
-
-  o << '\n';
-
-  o.indent(Indentation) << "static " << WidthStr <<
-    " fieldFromInstruction" << BitWidth <<
-    "(" << WidthStr <<" insn, unsigned startBit, unsigned numBits)\n";
-
-  o.indent(Indentation) << "{\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "assert(startBit + numBits <= " << BitWidth
-                        << " && \"Instruction field out of bounds!\");\n";
-  o << '\n';
-  o.indent(Indentation) << WidthStr << " fieldMask;\n";
-  o << '\n';
-  o.indent(Indentation) << "if (numBits == " << BitWidth << ")\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = (" << WidthStr << ")-1;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "else\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
-  --Indentation; --Indentation;
-
-  o << '\n';
-  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
+// emitFieldFromInstruction - Emit the templated helper function
+// fieldFromInstruction().
+static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
+  OS << "// Helper function for extracting fields from encoded instructions.\n"
+     << "template<typename InsnType>\n"
+   << "static InsnType fieldFromInstruction(InsnType insn, unsigned startBit,\n"
+     << "                                     unsigned numBits) {\n"
+     << "    assert(startBit + numBits <= (sizeof(InsnType)*8) &&\n"
+     << "           \"Instruction field out of bounds!\");\n"
+     << "    InsnType fieldMask;\n"
+     << "    if (numBits == sizeof(InsnType)*8)\n"
+     << "      fieldMask = (InsnType)(-1LL);\n"
+     << "    else\n"
+     << "      fieldMask = ((1 << numBits) - 1) << startBit;\n"
+     << "    return (insn & fieldMask) >> startBit;\n"
+     << "}\n\n";
+}
 
-  o << '\n';
+// emitDecodeInstruction - Emit the templated helper function
+// decodeInstruction().
+static void emitDecodeInstruction(formatted_raw_ostream &OS) {
+  OS << "template<typename InsnType>\n"
+     << "static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,\n"
+     << "                                      InsnType insn, uint64_t Address,\n"
+     << "                                      const void *DisAsm,\n"
+     << "                                      const MCSubtargetInfo &STI) {\n"
+     << "  uint64_t Bits = STI.getFeatureBits();\n"
+     << "\n"
+     << "  const uint8_t *Ptr = DecodeTable;\n"
+     << "  uint32_t CurFieldValue;\n"
+     << "  DecodeStatus S = MCDisassembler::Success;\n"
+     << "  for (;;) {\n"
+     << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
+     << "    switch (*Ptr) {\n"
+     << "    default:\n"
+     << "      errs() << Loc << \": Unexpected decode table opcode!\\n\";\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    case MCD::OPC_ExtractField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      ++Ptr;\n"
+     << "      CurFieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_ExtractField(\" << Start << \", \"\n"
+     << "                   << Len << \"): \" << CurFieldValue << \"\\n\");\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_FilterValue: {\n"
+     << "      // Decode the field value.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType Val = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // Perform the filter operation.\n"
+     << "      if (Val != CurFieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_FilterValue(\" << Val << \", \" << NumToSkip\n"
+     << "                   << \"): \" << ((Val != CurFieldValue) ? \"FAIL:\" : \"PASS:\")\n"
+     << "                   << \" continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckField: {\n"
+     << "      unsigned Start = *++Ptr;\n"
+     << "      unsigned Len = *++Ptr;\n"
+     << "      InsnType FieldValue = fieldFromInstruction(insn, Start, Len);\n"
+     << "      // Decode the field value.\n"
+     << "      uint32_t ExpectedValue = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // If the actual and expected values don't match, skip.\n"
+     << "      if (ExpectedValue != FieldValue)\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckField(\" << Start << \", \"\n"
+     << "                   << Len << \", \" << ExpectedValue << \", \" << NumToSkip\n"
+     << "                   << \"): FieldValue = \" << FieldValue << \", ExpectedValue = \"\n"
+     << "                   << ExpectedValue << \": \"\n"
+     << "                   << ((ExpectedValue == FieldValue) ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_CheckPredicate: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Predicate Index value.\n"
+     << "      unsigned PIdx = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "      // Check the predicate.\n"
+     << "      bool Pred;\n"
+     << "      if (!(Pred = checkDecoderPredicate(PIdx, Bits)))\n"
+     << "        Ptr += NumToSkip;\n"
+     << "      (void)Pred;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_CheckPredicate(\" << PIdx << \"): \"\n"
+     << "            << (Pred ? \"PASS\\n\" : \"FAIL\\n\"));\n"
+     << "\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Decode: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Opcode value.\n"
+     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+     << "                   << \", using decoder \" << DecodeIdx << \"\\n\" );\n"
+     << "      DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n"
+     << "\n"
+     << "      MI.setOpcode(Opc);\n"
+     << "      return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n"
+     << "    }\n"
+     << "    case MCD::OPC_SoftFail: {\n"
+     << "      // Decode the mask values.\n"
+     << "      unsigned Len;\n"
+     << "      InsnType PositiveMask = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      InsnType NegativeMask = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      bool Fail = (insn & PositiveMask) || (~insn & NegativeMask);\n"
+     << "      if (Fail)\n"
+     << "        S = MCDisassembler::SoftFail;\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_SoftFail: \" << (Fail ? \"FAIL\\n\":\"PASS\\n\"));\n"
+     << "      break;\n"
+     << "    }\n"
+     << "    case MCD::OPC_Fail: {\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Fail\\n\");\n"
+     << "      return MCDisassembler::Fail;\n"
+     << "    }\n"
+     << "    }\n"
+     << "  }\n"
+     << "  llvm_unreachable(\"bogosity detected in disassembler state machine!\");\n"
+     << "}\n\n";
 }
 
 // Emits disassembler code for instruction decoding.
 void FixedLenDecoderEmitter::run(raw_ostream &o) {
-  o << "#include \"llvm/MC/MCInst.h\"\n";
-  o << "#include \"llvm/Support/DataTypes.h\"\n";
-  o << "#include <assert.h>\n";
-  o << '\n';
-  o << "namespace llvm {\n\n";
+  formatted_raw_ostream OS(o);
+  OS << "#include \"llvm/MC/MCInst.h\"\n";
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/DataTypes.h\"\n";
+  OS << "#include \"llvm/Support/LEB128.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+  OS << "#include <assert.h>\n";
+  OS << '\n';
+  OS << "namespace llvm {\n\n";
+
+  emitFieldFromInstruction(OS);
 
   // Parameterize the decoders based on namespace and instruction width.
-  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
-    Target.getInstructionsByEnumValue();
+  NumberedInstructions = &Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned> > OpcMap;
   std::map<unsigned, std::vector<OperandInfo> > Operands;
 
-  for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
-    const CodeGenInstruction *Inst = NumberedInstructions[i];
+  for (unsigned i = 0; i < NumberedInstructions->size(); ++i) {
+    const CodeGenInstruction *Inst = NumberedInstructions->at(i);
     const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
@@ -1622,24 +2032,48 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
     }
   }
 
+  DecoderTableInfo TableInfo;
   std::set<unsigned> Sizes;
   for (std::map<std::pair<std::string, unsigned>,
                 std::vector<unsigned> >::const_iterator
        I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) {
-    // If we haven't visited this instruction width before, emit the
-    // helper method to extract fields.
-    if (!Sizes.count(I->first.second)) {
-      emitHelper(o, 8*I->first.second);
-      Sizes.insert(I->first.second);
-    }
-
     // Emit the decoder for this namespace+width combination.
-    FilterChooser FC(NumberedInstructions, I->second, Operands,
+    FilterChooser FC(*NumberedInstructions, I->second, Operands,
                      8*I->first.second, this);
-    FC.emitTop(o, 0, I->first.first);
+
+    // The decode table is cleared for each top level decoder function. The
+    // predicates and decoders themselves, however, are shared across all
+    // decoders to give more opportunities for uniqueing.
+    TableInfo.Table.clear();
+    TableInfo.FixupStack.clear();
+    TableInfo.Table.reserve(16384);
+    TableInfo.FixupStack.push_back(FixupList());
+    FC.emitTableEntries(TableInfo);
+    // Any NumToSkip fixups in the top level scope can resolve to the
+    // OPC_Fail at the end of the table.
+    assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
+    // Resolve any NumToSkip fixups in the current scope.
+    resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                       TableInfo.Table.size());
+    TableInfo.FixupStack.clear();
+
+    TableInfo.Table.push_back(MCD::OPC_Fail);
+
+    // Print the table to the output stream.
+    emitTable(OS, TableInfo.Table, 0, FC.getBitWidth(), I->first.first);
+    OS.flush();
   }
 
-  o << "\n} // End llvm namespace \n";
+  // Emit the predicate function.
+  emitPredicateFunction(OS, TableInfo.Predicates, 0);
+
+  // Emit the decoder function.
+  emitDecoderFunction(OS, TableInfo.Decoders, 0);
+
+  // Emit the main entry point for the decoder, decodeInstruction().
+  emitDecodeInstruction(OS);
+
+  OS << "\n} // End llvm namespace\n";
 }
 
 namespace llvm {
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 3adb869..b41ad94 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -319,6 +319,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isCompare)          OS << "|(1<<MCID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1<<MCID::MoveImm)";
   if (Inst.isBitcast)          OS << "|(1<<MCID::Bitcast)";
+  if (Inst.isSelect)           OS << "|(1<<MCID::Select)";
   if (Inst.isBarrier)          OS << "|(1<<MCID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1<<MCID::DelaySlot)";
   if (Inst.isCall)             OS << "|(1<<MCID::Call)";
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 3d8d515..02546df 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -145,9 +145,9 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
     OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
       OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS\n};\n";
+    OS << "  NUM_TARGET_SUBREGS\n};\n";
     if (!Namespace.empty())
       OS << "}\n";
   }
@@ -479,16 +479,12 @@ public:
   }
 };
 
-static void printRegister(raw_ostream &OS, const CodeGenRegister *Reg) {
-  OS << getQualifiedName(Reg->TheDef);
-}
-
 static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
   OS << getEnumName(VT);
 }
 
 static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
-  OS << Idx->getQualifiedName();
+  OS << Idx->EnumValue;
 }
 
 // Differentially encoded register and regunit lists allow for better
@@ -517,6 +513,19 @@ DiffVec &diffEncode(DiffVec &V, unsigned InitVal, ArrayRef<unsigned> List) {
   return V;
 }
 
+template<typename Iter>
+static
+DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
+  assert(V.empty() && "Clear DiffVec before diffEncode.");
+  uint16_t Val = uint16_t(InitVal);
+  for (Iter I = Begin; I != End; ++I) {
+    uint16_t Cur = (*I)->EnumValue;
+    V.push_back(Cur - Val);
+    Val = Cur;
+  }
+  return V;
+}
+
 static void printDiff16(raw_ostream &OS, uint16_t Val) {
   OS << Val;
 }
@@ -537,15 +546,21 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // The lists of sub-registers, super-registers, and overlaps all go in the
   // same array. That allows us to share suffixes.
   typedef std::vector<const CodeGenRegister*> RegVec;
-  SmallVector<RegVec, 4> SubRegLists(Regs.size());
-  SmallVector<RegVec, 4> OverlapLists(Regs.size());
-  SequenceToOffsetTable<RegVec, CodeGenRegister::Less> RegSeqs;
 
   // Differentially encoded lists.
   SequenceToOffsetTable<DiffVec> DiffSeqs;
+  SmallVector<DiffVec, 4> SubRegLists(Regs.size());
+  SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
+  SmallVector<DiffVec, 4> OverlapLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
   SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
 
+  // Keep track of sub-register names as well. These are not differentially
+  // encoded.
+  typedef SmallVector<const CodeGenSubRegIndex*, 4> SubRegIdxVec;
+  SequenceToOffsetTable<SubRegIdxVec> SubRegIdxSeqs;
+  SmallVector<SubRegIdxVec, 4> SubRegIdxLists(Regs.size());
+
   SequenceToOffsetTable<std::string> RegStrings;
 
   // Precompute register lists for the SequenceToOffsetTable.
@@ -557,37 +572,29 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     // Compute the ordered sub-register list.
     SetVector<const CodeGenRegister*> SR;
     Reg->addSubRegsPreOrder(SR, RegBank);
-    RegVec &SubRegList = SubRegLists[i];
-    SubRegList.assign(SR.begin(), SR.end());
-    RegSeqs.add(SubRegList);
+    diffEncode(SubRegLists[i], Reg->EnumValue, SR.begin(), SR.end());
+    DiffSeqs.add(SubRegLists[i]);
+
+    // Compute the corresponding sub-register indexes.
+    SubRegIdxVec &SRIs = SubRegIdxLists[i];
+    for (unsigned j = 0, je = SR.size(); j != je; ++j)
+      SRIs.push_back(Reg->getSubRegIndex(SR[j]));
+    SubRegIdxSeqs.add(SRIs);
 
     // Super-registers are already computed.
     const RegVec &SuperRegList = Reg->getSuperRegs();
-    RegSeqs.add(SuperRegList);
-
-    // The list of overlaps doesn't need to have any particular order, except
-    // Reg itself must be the first element. Pick an ordering that has one of
-    // the other lists as a suffix.
-    RegVec &OverlapList = OverlapLists[i];
-    const RegVec &Suffix = SubRegList.size() > SuperRegList.size() ?
-                           SubRegList : SuperRegList;
-    CodeGenRegister::Set Omit(Suffix.begin(), Suffix.end());
+    diffEncode(SuperRegLists[i], Reg->EnumValue,
+               SuperRegList.begin(), SuperRegList.end());
+    DiffSeqs.add(SuperRegLists[i]);
 
-    // First element is Reg itself.
-    OverlapList.push_back(Reg);
-    Omit.insert(Reg);
-
-    // Any elements not in Suffix.
+    // The list of overlaps doesn't need to have any particular order, and Reg
+    // itself must be omitted.
+    DiffVec &OverlapList = OverlapLists[i];
     CodeGenRegister::Set OSet;
     Reg->computeOverlaps(OSet, RegBank);
-    std::set_difference(OSet.begin(), OSet.end(),
-                        Omit.begin(), Omit.end(),
-                        std::back_inserter(OverlapList),
-                        CodeGenRegister::Less());
-
-    // Finally, Suffix itself.
-    OverlapList.insert(OverlapList.end(), Suffix.begin(), Suffix.end());
-    RegSeqs.add(OverlapList);
+    OSet.erase(Reg);
+    diffEncode(OverlapList, Reg->EnumValue, OSet.begin(), OSet.end());
+    DiffSeqs.add(OverlapList);
 
     // Differentially encode the register unit list, seeded by register number.
     // First compute a scale factor that allows more diff-lists to be reused:
@@ -616,23 +623,23 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
 
   // Compute the final layout of the sequence table.
-  RegSeqs.layout();
   DiffSeqs.layout();
+  SubRegIdxSeqs.layout();
 
   OS << "namespace llvm {\n\n";
 
   const std::string &TargetName = Target.getName();
 
-  // Emit the shared table of register lists.
-  OS << "extern const uint16_t " << TargetName << "RegLists[] = {\n";
-  RegSeqs.emit(OS, printRegister);
-  OS << "};\n\n";
-
   // Emit the shared table of differential lists.
   OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
+  // Emit the table of sub-register indexes.
+  OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[] = {\n";
+  SubRegIdxSeqs.emit(OS, printSubRegIndex);
+  OS << "};\n\n";
+
   // Emit the string table.
   RegStrings.layout();
   OS << "extern const char " << TargetName << "RegStrings[] = {\n";
@@ -641,15 +648,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "extern const MCRegisterDesc " << TargetName
      << "RegDesc[] = { // Descriptors\n";
-  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0 },\n";
+  OS << "  { " << RegStrings.get("") << ", 0, 0, 0, 0, 0 },\n";
 
   // Emit the register descriptors now.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     const CodeGenRegister *Reg = Regs[i];
     OS << "  { " << RegStrings.get(Reg->getName()) << ", "
-       << RegSeqs.get(OverlapLists[i]) << ", "
-       << RegSeqs.get(SubRegLists[i]) << ", "
-       << RegSeqs.get(Reg->getSuperRegs()) << ", "
+       << DiffSeqs.get(OverlapLists[i]) << ", "
+       << DiffSeqs.get(SubRegLists[i]) << ", "
+       << DiffSeqs.get(SuperRegLists[i]) << ", "
+       << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
        << (DiffSeqs.get(RegUnitLists[i])*16 + RegUnitInitScale[i]) << " },\n";
   }
   OS << "};\n\n";      // End of register descriptors...
@@ -729,37 +737,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "};\n\n";
 
-  // Emit the data table for getSubReg().
   ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
-  if (SubRegIndices.size()) {
-    OS << "const uint16_t " << TargetName << "SubRegTable[]["
-       << SubRegIndices.size() << "] = {\n";
-    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-      const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
-      OS << "  /* " << Regs[i]->TheDef->getName() << " */\n";
-      if (SRM.empty()) {
-        OS << "  {0},\n";
-        continue;
-      }
-      OS << "  {";
-      for (unsigned j = 0, je = SubRegIndices.size(); j != je; ++j) {
-        // FIXME: We really should keep this to 80 columns...
-        CodeGenRegister::SubRegMap::const_iterator SubReg =
-          SRM.find(SubRegIndices[j]);
-        if (SubReg != SRM.end())
-          OS << getQualifiedName(SubReg->second->TheDef);
-        else
-          OS << "0";
-        if (j != je - 1)
-          OS << ", ";
-      }
-      OS << "}" << (i != e ? "," : "") << "\n";
-    }
-    OS << "};\n\n";
-    OS << "const uint16_t *get" << TargetName
-       << "SubRegTable() {\n  return (const uint16_t *)" << TargetName
-       << "SubRegTable;\n}\n\n";
-  }
 
   EmitRegMappingTables(OS, Regs, false);
 
@@ -783,22 +761,17 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // MCRegisterInfo initialization routine.
   OS << "static inline void Init" << TargetName
      << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, "
-     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n";
-  OS << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
+     << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n"
+     << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
      << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, "
      << RegisterClasses.size() << ", "
      << TargetName << "RegUnitRoots, "
      << RegBank.getNumNativeRegUnits() << ", "
-     << TargetName << "RegLists, "
      << TargetName << "RegDiffLists, "
-     << TargetName << "RegStrings, ";
-  if (SubRegIndices.size() != 0)
-    OS << "(uint16_t*)" << TargetName << "SubRegTable, "
-       << SubRegIndices.size() << ",\n";
-  else
-    OS << "NULL, 0,\n";
-
-  OS << "  " << TargetName << "RegEncodingTable);\n\n";
+     << TargetName << "RegStrings, "
+     << TargetName << "SubRegIdxLists, "
+     << SubRegIndices.size() << ",\n"
+     << "  " << TargetName << "RegEncodingTable);\n\n";
 
   EmitRegMapping(OS, Regs, false);
 
@@ -912,17 +885,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "\" };\n\n";
 
-  // Emit names of the anonymous subreg indices.
-  unsigned NamedIndices = RegBank.getNumNamedIndices();
-  if (SubRegIndices.size() > NamedIndices) {
-    OS << "  enum {";
-    for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) {
-      OS << "\n    " << SubRegIndices[i]->getName() << " = " << i+1;
-      if (i+1 != e)
-        OS << ',';
-    }
-    OS << "\n  };\n\n";
-  }
   OS << "\n";
 
   // Now that all of the structs have been emitted, emit the instances.
@@ -1148,13 +1110,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const uint16_t " << TargetName << "RegLists[];\n";
   OS << "extern const uint16_t " << TargetName << "RegDiffLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2];\n";
-  if (SubRegIndices.size() != 0)
-    OS << "extern const uint16_t *get" << TargetName
-       << "SubRegTable();\n";
+  OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[];\n";
   OS << "extern const uint16_t " << TargetName << "RegEncodingTable[];\n";
 
   EmitRegMappingTables(OS, Regs, true);
@@ -1169,17 +1128,11 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
      << "MCRegisterClasses, " << RegisterClasses.size() << ",\n"
      << "                     " << TargetName << "RegUnitRoots,\n"
      << "                     " << RegBank.getNumNativeRegUnits() << ",\n"
-     << "                     " << TargetName << "RegLists,\n"
      << "                     " << TargetName << "RegDiffLists,\n"
      << "                     " << TargetName << "RegStrings,\n"
-     << "                     ";
-  if (SubRegIndices.size() != 0)
-    OS << "get" << TargetName << "SubRegTable(), "
-       << SubRegIndices.size() << ",\n";
-  else
-    OS << "NULL, 0,\n";
-
-  OS << "                     " << TargetName << "RegEncodingTable);\n\n";
+     << "                     " << TargetName << "SubRegIdxLists,\n"
+     << "                     " << SubRegIndices.size() << ",\n"
+     << "                     " << TargetName << "RegEncodingTable);\n\n";
 
   EmitRegMapping(OS, Regs, true);
 
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
index 803f5bd..a098d7d 100644
--- a/utils/TableGen/StringToOffsetTable.h
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 
 namespace llvm {
 
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index b3bf4aa..3472343 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -590,6 +590,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
+    EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
     if (SchedModels.hasItineraryClasses())
       OS << "  " << PI->ItinsDef->getName();
     else
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index 0417e9d..c13a0cc 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -14,6 +14,7 @@
 #include <string.h>
 
 #define INSTRUCTION_SPECIFIER_FIELDS       \
+  struct OperandSpecifier operands[X86_MAX_OPERANDS]; \
   bool                    filtered;        \
   InstructionContext      insnContext;     \
   std::string             name;            \
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 2875168..f3bd373 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -21,10 +21,11 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include <map>
 
 using namespace llvm;
 using namespace X86Disassembler;
-  
+
 /// inheritsFrom - Indicates whether all instructions in one class also belong
 ///   to another class.
 ///
@@ -36,7 +37,7 @@ static inline bool inheritsFrom(InstructionContext child,
                                 bool VEX_LIG = false) {
   if (child == parent)
     return true;
-  
+
   switch (parent) {
   case IC:
     return(inheritsFrom(child, IC_64BIT) ||
@@ -117,17 +118,17 @@ static inline bool inheritsFrom(InstructionContext child,
 /// @param upper  - The class that may be preferable
 /// @param lower  - The class that may be less preferable
 /// @return       - True if upper is to be preferred, false otherwise.
-static inline bool outranks(InstructionContext upper, 
+static inline bool outranks(InstructionContext upper,
                             InstructionContext lower) {
   assert(upper < IC_max);
   assert(lower < IC_max);
-  
+
 #define ENUM_ENTRY(n, r, d) r,
   static int ranks[IC_max] = {
     INSTRUCTION_CONTEXTS
   };
 #undef ENUM_ENTRY
-  
+
   return (ranks[upper] > ranks[lower]);
 }
 
@@ -170,24 +171,22 @@ static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
   }
 }
 
-void DisassemblerTables::emitOneID(raw_ostream &o,
-                                   uint32_t &i,
-                                   InstrUID id,
+void DisassemblerTables::emitOneID(raw_ostream &o, unsigned &i, InstrUID id,
                                    bool addComma) const {
   if (id)
     o.indent(i * 2) << format("0x%hx", id);
   else
     o.indent(i * 2) << 0;
-  
+
   if (addComma)
     o << ", ";
   else
     o << "  ";
-  
+
   o << "/* ";
   o << InstructionSpecifiers[id].name;
   o << "*/";
-  
+
   o << "\n";
 }
 
@@ -197,8 +196,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o,
 ///
 /// @param o        - The output stream on which to emit the table.
 /// @param i        - The indentation level for that output stream.
-static void emitEmptyTable(raw_ostream &o, uint32_t &i)
-{
+static void emitEmptyTable(raw_ostream &o, unsigned &i) {
   o.indent(i * 2) << "0x0, /* EmptyTable */\n";
 }
 
@@ -207,15 +205,12 @@ static void emitEmptyTable(raw_ostream &o, uint32_t &i)
 ///
 /// @param decision - The decision to be compacted.
 /// @return         - The compactest available representation for the decision.
-static ModRMDecisionType getDecisionType(ModRMDecision &decision)
-{
+static ModRMDecisionType getDecisionType(ModRMDecision &decision) {
   bool satisfiesOneEntry = true;
   bool satisfiesSplitRM = true;
   bool satisfiesSplitReg = true;
 
-  uint16_t index;
-
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     if (decision.instructionIDs[index] != decision.instructionIDs[0])
       satisfiesOneEntry = false;
 
@@ -252,27 +247,25 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision)
 ///   to a particular decision type.
 ///
 /// @param dt - The decision type.
-/// @return   - A pointer to the statically-allocated string (e.g., 
+/// @return   - A pointer to the statically-allocated string (e.g.,
 ///             "MODRM_ONEENTRY" for MODRM_ONEENTRY).
-static const char* stringForDecisionType(ModRMDecisionType dt)
-{
+static const char* stringForDecisionType(ModRMDecisionType dt) {
 #define ENUM_ENTRY(n) case n: return #n;
   switch (dt) {
     default:
-      llvm_unreachable("Unknown decision type");  
+      llvm_unreachable("Unknown decision type");
     MODRMTYPES
-  };  
+  };
 #undef ENUM_ENTRY
 }
-  
+
 /// stringForModifierType - Returns a statically-allocated string corresponding
 ///   to an opcode modifier type.
 ///
 /// @param mt - The modifier type.
 /// @return   - A pointer to the statically-allocated string (e.g.,
 ///             "MODIFIER_NONE" for MODIFIER_NONE).
-static const char* stringForModifierType(ModifierType mt)
-{
+static const char* stringForModifierType(ModifierType mt) {
 #define ENUM_ENTRY(n) case n: return #n;
   switch(mt) {
     default:
@@ -281,35 +274,31 @@ static const char* stringForModifierType(ModifierType mt)
   };
 #undef ENUM_ENTRY
 }
-  
+
 DisassemblerTables::DisassemblerTables() {
   unsigned i;
-  
+
   for (i = 0; i < array_lengthof(Tables); i++) {
     Tables[i] = new ContextDecision;
     memset(Tables[i], 0, sizeof(ContextDecision));
   }
-  
+
   HasConflicts = false;
 }
-  
+
 DisassemblerTables::~DisassemblerTables() {
   unsigned i;
-  
+
   for (i = 0; i < array_lengthof(Tables); i++)
     delete Tables[i];
 }
-  
-void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
-                                           raw_ostream &o2,
-                                           uint32_t &i1,
-                                           uint32_t &i2,
-                                           ModRMDecision &decision)
-  const {
-  static uint64_t sTableNumber = 0;
-  static uint64_t sEntryNumber = 1;
+
+void DisassemblerTables::emitModRMDecision(raw_ostream &o1, raw_ostream &o2,
+                                           unsigned &i1, unsigned &i2,
+                                           ModRMDecision &decision) const {
+  static uint32_t sTableNumber = 0;
+  static uint32_t sEntryNumber = 1;
   ModRMDecisionType dt = getDecisionType(decision);
-  uint16_t index;
 
   if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
   {
@@ -338,13 +327,13 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
       emitOneID(o1, i1, decision.instructionIDs[0xc0], true); // mod = 0b11
       break;
     case MODRM_SPLITREG:
-      for (index = 0; index < 64; index += 8)
+      for (unsigned index = 0; index < 64; index += 8)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
-      for (index = 0xc0; index < 256; index += 8)
+      for (unsigned index = 0xc0; index < 256; index += 8)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
     case MODRM_FULL:
-      for (index = 0; index < 256; ++index)
+      for (unsigned index = 0; index < 256; ++index)
         emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
   }
@@ -380,20 +369,15 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
   ++sTableNumber;
 }
 
-void DisassemblerTables::emitOpcodeDecision(
-  raw_ostream &o1,
-  raw_ostream &o2,
-  uint32_t &i1,
-  uint32_t &i2,
-  OpcodeDecision &decision) const {
-  uint16_t index;
-
+void DisassemblerTables::emitOpcodeDecision(raw_ostream &o1, raw_ostream &o2,
+                                            unsigned &i1, unsigned &i2,
+                                            OpcodeDecision &decision) const {
   o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
   i2++;
   o2.indent(i2) << "{" << "\n";
   i2++;
 
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     o2.indent(i2);
 
     o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
@@ -412,21 +396,16 @@ void DisassemblerTables::emitOpcodeDecision(
   o2.indent(i2) << "}" << "\n";
 }
 
-void DisassemblerTables::emitContextDecision(
-  raw_ostream &o1,
-  raw_ostream &o2,
-  uint32_t &i1,
-  uint32_t &i2,
-  ContextDecision &decision,
-  const char* name) const {
+void DisassemblerTables::emitContextDecision(raw_ostream &o1, raw_ostream &o2,
+                                             unsigned &i1, unsigned &i2,
+                                             ContextDecision &decision,
+                                             const char* name) const {
   o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n";
   i2++;
   o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
   i2++;
 
-  unsigned index;
-
-  for (index = 0; index < IC_max; ++index) {
+  for (unsigned index = 0; index < IC_max; ++index) {
     o2.indent(i2) << "/* ";
     o2 << stringForContext((InstructionContext)index);
     o2 << " */";
@@ -444,58 +423,81 @@ void DisassemblerTables::emitContextDecision(
   o2.indent(i2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) 
-  const {
+void DisassemblerTables::emitInstructionInfo(raw_ostream &o,
+                                             unsigned &i) const {
+  unsigned NumInstructions = InstructionSpecifiers.size();
+
+  o << "static const struct OperandSpecifier x86OperandSets[]["
+    << X86_MAX_OPERANDS << "] = {\n";
+
+  typedef std::vector<std::pair<const char *, const char *> > OperandListTy;
+  std::map<OperandListTy, unsigned> OperandSets;
+
+  unsigned OperandSetNum = 0;
+  for (unsigned Index = 0; Index < NumInstructions; ++Index) {
+    OperandListTy OperandList;
+
+    for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
+         ++OperandIndex) {
+      const char *Encoding =
+        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[Index]
+                                 .operands[OperandIndex].encoding);
+      const char *Type =
+        stringForOperandType((OperandType)InstructionSpecifiers[Index]
+                             .operands[OperandIndex].type);
+      OperandList.push_back(std::make_pair(Encoding, Type));
+    }
+    unsigned &N = OperandSets[OperandList];
+    if (N != 0) continue;
+
+    N = ++OperandSetNum;
+
+    o << "  { /* " << (OperandSetNum - 1) << " */\n";
+    for (unsigned i = 0, e = OperandList.size(); i != e; ++i) {
+      o << "    { " << OperandList[i].first << ", "
+        << OperandList[i].second << " },\n";
+    }
+    o << "  },\n";
+  }
+  o << "};" << "\n\n";
+
   o.indent(i * 2) << "static const struct InstructionSpecifier ";
   o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n";
-  
-  i++;
 
-  uint16_t numInstructions = InstructionSpecifiers.size();
-  uint16_t index, operandIndex;
+  i++;
 
-  for (index = 0; index < numInstructions; ++index) {
+  for (unsigned index = 0; index < NumInstructions; ++index) {
     o.indent(i * 2) << "{ /* " << index << " */" << "\n";
     i++;
 
     o.indent(i * 2) << stringForModifierType(
                        (ModifierType)InstructionSpecifiers[index].modifierType);
-    o << "," << "\n";
+    o << ",\n";
 
     o.indent(i * 2) << "0x";
     o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
-    o << "," << "\n";
-
-    o.indent(i * 2) << "{" << "\n";
-    i++;
-
-    for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
-      o.indent(i * 2) << "{ ";
-      o <<stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
-                                   .operands[operandIndex]
-                                   .encoding);
-      o << ", ";
-      o << stringForOperandType((OperandType)InstructionSpecifiers[index]
-                                .operands[operandIndex]
-                                .type);
-      o << " }";
-
-      if (operandIndex < X86_MAX_OPERANDS - 1)
-        o << ",";
-
-      o << "\n";
+    o << ",\n";
+
+    OperandListTy OperandList;
+    for (unsigned OperandIndex = 0; OperandIndex < X86_MAX_OPERANDS;
+         ++OperandIndex) {
+      const char *Encoding =
+        stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
+                                 .operands[OperandIndex].encoding);
+      const char *Type =
+        stringForOperandType((OperandType)InstructionSpecifiers[index]
+                             .operands[OperandIndex].type);
+      OperandList.push_back(std::make_pair(Encoding, Type));
     }
+    o.indent(i * 2) << (OperandSets[OperandList] - 1) << ",\n";
 
-    i--;
-    o.indent(i * 2) << "}," << "\n";
-    
     o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */";
     o << "\n";
 
     i--;
     o.indent(i * 2) << "}";
 
-    if (index + 1 < numInstructions)
+    if (index + 1 < NumInstructions)
       o << ",";
 
     o << "\n";
@@ -505,14 +507,12 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
   o.indent(i * 2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
-  uint16_t index;
-
-  o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR
+void DisassemblerTables::emitContextTable(raw_ostream &o, unsigned &i) const {
+  o.indent(i * 2) << "static const uint8_t " CONTEXTS_STR
                      "[256] = {\n";
   i++;
 
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     o.indent(i * 2);
 
     if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
@@ -545,7 +545,7 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
       o << "IC_64BIT_REXW_XS";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
       o << "IC_64BIT_REXW_XD";
-    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && 
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) &&
              (index & ATTR_OPSIZE))
       o << "IC_64BIT_REXW_OPSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_XD) && (index & ATTR_OPSIZE))
@@ -593,11 +593,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
   o.indent(i * 2) << "};" << "\n";
 }
 
-void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
-                                            raw_ostream &o2,
-                                            uint32_t &i1,
-                                            uint32_t &i2)
-  const {
+void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2,
+                                             unsigned &i1, unsigned &i2) const {
   emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
   emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
@@ -607,15 +604,15 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
 }
 
 void DisassemblerTables::emit(raw_ostream &o) const {
-  uint32_t i1 = 0;
-  uint32_t i2 = 0;
-  
+  unsigned i1 = 0;
+  unsigned i2 = 0;
+
   std::string s1;
   std::string s2;
-  
+
   raw_string_ostream o1(s1);
   raw_string_ostream o2(s2);
-  
+
   emitInstructionInfo(o, i2);
   o << "\n";
 
@@ -641,9 +638,7 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
                                         const ModRMFilter &filter,
                                         InstrUID          uid,
                                         uint8_t           opcode) {
-  unsigned index;
-
-  for (index = 0; index < 256; ++index) {
+  for (unsigned index = 0; index < 256; ++index) {
     if (filter.accepts(index)) {
       if (decision.instructionIDs[index] == uid)
         continue;
@@ -653,10 +648,10 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
           InstructionSpecifiers[uid];
         InstructionSpecifier &previousInfo =
           InstructionSpecifiers[decision.instructionIDs[index]];
-        
+
         if(newInfo.filtered)
           continue; // filtered instructions get lowest priority
-        
+
         if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" ||
                                            newInfo.name == "XCHG32ar" ||
                                            newInfo.name == "XCHG32ar64" ||
@@ -665,7 +660,7 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
 
         if (outranks(previousInfo.insnContext, newInfo.insnContext))
           continue;
-        
+
         if (previousInfo.insnContext == newInfo.insnContext &&
             !previousInfo.filtered) {
           errs() << "Error: Primary decode conflict: ";
@@ -690,17 +685,15 @@ void DisassemblerTables::setTableFields(OpcodeType          type,
                                         InstrUID            uid,
                                         bool                is32bit,
                                         bool                ignoresVEX_L) {
-  unsigned index;
-  
   ContextDecision &decision = *Tables[type];
 
-  for (index = 0; index < IC_max; ++index) {
+  for (unsigned index = 0; index < IC_max; ++index) {
     if (is32bit && inheritsFrom((InstructionContext)index, IC_64BIT))
       continue;
 
-    if (inheritsFrom((InstructionContext)index, 
+    if (inheritsFrom((InstructionContext)index,
                      InstructionSpecifiers[uid].insnContext, ignoresVEX_L))
-      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 
+      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
                      filter,
                      uid,
                      opcode);
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index e148cd2..ea006c0 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -42,13 +42,13 @@ private:
   /// [4] three-byte opcodes of the form 0f a6 __
   /// [5] three-byte opcodes of the form 0f a7 __
   ContextDecision* Tables[6];
-  
+
   /// The instruction information table
   std::vector<InstructionSpecifier> InstructionSpecifiers;
-  
+
   /// True if there are primary decode conflicts in the instruction set
   bool HasConflicts;
-  
+
   /// emitOneID - Emits a table entry for a single instruction entry, at the
   ///   innermost level of the structure hierarchy.  The entry is printed out
   ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
@@ -64,7 +64,7 @@ private:
                  uint32_t &i,
                  InstrUID id,
                  bool addComma) const;
-  
+
   /// emitModRMDecision - Emits a table of entries corresponding to a single
   ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
   ///   decisions are printed as:
@@ -77,12 +77,12 @@ private:
   ///   where nnnn is a unique ID for the corresponding table of IDs.
   ///   TYPE indicates whether the table has one entry that is the same
   ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
-  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.  
+  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.
   ///   nnnn is the number of a table for looking up these values.  The tables
   ///   are written separately so that tables consisting entirely of zeros will
   ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
   ///   is printed as:
-  ///   
+  ///
   ///   InstrUID modRMTablennnn[k] = {
   ///     nnnn, /* MNEMONIC */
   ///     ...
@@ -100,7 +100,7 @@ private:
                          uint32_t &i1,
                          uint32_t &i2,
                          ModRMDecision &decision) const;
-  
+
   /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
   ///   decisions.  An OpcodeDecision is printed as:
   ///
@@ -129,8 +129,8 @@ private:
                           uint32_t &i1,
                           uint32_t &i2,
                           OpcodeDecision &decision) const;
-  
-  /// emitContextDecision - Emits a ContextDecision and all its subsidiary 
+
+  /// emitContextDecision - Emits a ContextDecision and all its subsidiary
   ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
   ///
   ///   struct ContextDecision NAME = {
@@ -163,10 +163,10 @@ private:
   void emitContextDecision(raw_ostream &o1,
                            raw_ostream &o2,
                            uint32_t &i1,
-                           uint32_t &i2,                           
+                           uint32_t &i2,
                            ContextDecision &decision,
                            const char* name) const;
-  
+
   /// emitInstructionInfo - Prints the instruction specifier table, which has
   ///   one entry for each instruction, and contains name and operand
   ///   information.  This table is printed as:
@@ -187,17 +187,17 @@ private:
   ///   };
   ///
   ///   k is the total number of instructions.
-  ///   nnnn is the ID of the current instruction (0-based).  This table 
+  ///   nnnn is the ID of the current instruction (0-based).  This table
   ///   includes entries for non-instructions like PHINODE.
   ///   0xnn is the lowest possible opcode for the current instruction, used for
   ///   AddRegFrm instructions to compute the operand's value.
   ///   ENCODING and TYPE describe the encoding and type for a single operand.
   ///
-  /// @param o  - The output stream to which the instruction table should be 
+  /// @param o  - The output stream to which the instruction table should be
   ///             written.
   /// @param i  - The indent level for use with the stream.
   void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
-  
+
   /// emitContextTable - Prints the table that is used to translate from an
   ///   instruction attribute mask to an instruction context.  This table is
   ///   printed as:
@@ -213,7 +213,7 @@ private:
   /// @param o  - The output stream to which the context table should be written.
   /// @param i  - The indent level for use with the stream.
   void emitContextTable(raw_ostream &o, uint32_t &i) const;
-  
+
   /// emitContextDecisions - Prints all four ContextDecision structures using
   ///   emitContextDecision().
   ///
@@ -225,7 +225,7 @@ private:
   void emitContextDecisions(raw_ostream &o1,
                             raw_ostream &o2,
                             uint32_t &i1,
-                            uint32_t &i2) const; 
+                            uint32_t &i2) const;
 
   /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
   ///   ModRMDecision to refer to a particular instruction ID.
@@ -241,14 +241,14 @@ private:
 public:
   /// Constructor - Allocates space for the class decisions and clears them.
   DisassemblerTables();
-  
+
   ~DisassemblerTables();
-  
+
   /// emit - Emits the instruction table, context table, and class decisions.
   ///
   /// @param o  - The output stream to print the tables to.
   void emit(raw_ostream &o) const;
-  
+
   /// setTableFields - Uses the opcode type, instruction context, opcode, and a
   ///   ModRMFilter as criteria to set a particular set of entries in the
   ///   decode tables to point to a specific uid.
@@ -268,24 +268,24 @@ public:
                       const ModRMFilter &filter,
                       InstrUID uid,
                       bool is32bit,
-                      bool ignoresVEX_L);  
-  
+                      bool ignoresVEX_L);
+
   /// specForUID - Returns the instruction specifier for a given unique
   ///   instruction ID.  Used when resolving collisions.
   ///
   /// @param uid  - The unique ID of the instruction.
-  /// @return     - A reference to the instruction specifier. 
+  /// @return     - A reference to the instruction specifier.
   InstructionSpecifier& specForUID(InstrUID uid) {
     if (uid >= InstructionSpecifiers.size())
       InstructionSpecifiers.resize(uid + 1);
-    
+
     return InstructionSpecifiers[uid];
   }
-  
+
   // hasConflicts - Reports whether there were primary decode conflicts
   //   from any instructions added to the tables.
   // @return  - true if there were; false otherwise.
-  
+
   bool hasConflicts() {
     return HasConflicts;
   }
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 6a685ff..7ac2336 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -57,7 +57,7 @@ namespace X86Local {
     MRMDestMem  = 4,
     MRMSrcReg   = 5,
     MRMSrcMem   = 6,
-    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, 
+    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
     MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
     MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
     MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
@@ -69,7 +69,7 @@ namespace X86Local {
 #undef MAP
     lastMRM
   };
-  
+
   enum {
     TB  = 1,
     REP = 2,
@@ -82,17 +82,17 @@ namespace X86Local {
 }
 
 // If rows are added to the opcode extension tables, then corresponding entries
-// must be added here.  
+// must be added here.
 //
 // If the row corresponds to a single byte (i.e., 8f), then add an entry for
 // that byte to ONE_BYTE_EXTENSION_TABLES.
 //
-// If the row corresponds to two bytes where the first is 0f, add an entry for 
+// If the row corresponds to two bytes where the first is 0f, add an entry for
 // the second byte to TWO_BYTE_EXTENSION_TABLES.
 //
 // If the row corresponds to some other set of bytes, you will need to modify
 // the code in RecognizableInstr::emitDecodePath() as well, and add new prefixes
-// to the X86 TD files, except in two cases: if the first two bytes of such a 
+// to the X86 TD files, except in two cases: if the first two bytes of such a
 // new combination are 0f 38 or 0f 3a, you just have to add maps called
 // THREE_BYTE_38_EXTENSION_TABLES and THREE_BYTE_3A_EXTENSION_TABLES and add a
 // switch(Opcode) just below the case X86Local::T8: or case X86Local::TA: line
@@ -116,7 +116,7 @@ namespace X86Local {
   EXTENSION_TABLE(f7)             \
   EXTENSION_TABLE(fe)             \
   EXTENSION_TABLE(ff)
-  
+
 #define TWO_BYTE_EXTENSION_TABLES \
   EXTENSION_TABLE(00)             \
   EXTENSION_TABLE(01)             \
@@ -134,7 +134,7 @@ namespace X86Local {
 using namespace X86Disassembler;
 
 /// needsModRMForDecode - Indicates whether a particular instruction requires a
-///   ModR/M byte for the instruction to be properly decoded.  For example, a 
+///   ModR/M byte for the instruction to be properly decoded.  For example, a
 ///   MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
 ///   0b11.
 ///
@@ -213,17 +213,17 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   Rec = insn.TheDef;
   Name = Rec->getName();
   Spec = &tables.specForUID(UID);
-  
+
   if (!Rec->isSubClassOf("X86Inst")) {
     ShouldBeEmitted = false;
     return;
   }
-  
+
   Prefix   = byteFromRec(Rec, "Prefix");
   Opcode   = byteFromRec(Rec, "Opcode");
   Form     = byteFromRec(Rec, "FormBits");
   SegOvr   = byteFromRec(Rec, "SegOvrBits");
-  
+
   HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
   HasAdSizePrefix  = Rec->getValueAsBit("hasAdSizePrefix");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
@@ -235,12 +235,12 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
   HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
   IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
-  
+
   Name      = Rec->getName();
   AsmString = Rec->getValueAsString("AsmString");
-  
+
   Operands = &insn.Operands.OperandList;
-  
+
   IsSSE            = (HasOpSizePrefix && (Name.find("16") == Name.npos)) ||
                      (Name.find("CRC32") != Name.npos);
   HasFROperands    = hasFROperands();
@@ -262,20 +262,20 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
     }
   }
   // FIXME: These instructions aren't marked as 64-bit in any way
-  Is64Bit |= Rec->getName() == "JMP64pcrel32" || 
-             Rec->getName() == "MASKMOVDQU64" || 
-             Rec->getName() == "POPFS64" || 
-             Rec->getName() == "POPGS64" || 
-             Rec->getName() == "PUSHFS64" || 
+  Is64Bit |= Rec->getName() == "JMP64pcrel32" ||
+             Rec->getName() == "MASKMOVDQU64" ||
+             Rec->getName() == "POPFS64" ||
+             Rec->getName() == "POPGS64" ||
+             Rec->getName() == "PUSHFS64" ||
              Rec->getName() == "PUSHGS64" ||
              Rec->getName() == "REX64_PREFIX" ||
-             Rec->getName().find("MOV64") != Name.npos || 
+             Rec->getName().find("MOV64") != Name.npos ||
              Rec->getName().find("PUSH64") != Name.npos ||
              Rec->getName().find("POP64") != Name.npos;
 
   ShouldBeEmitted  = true;
 }
-  
+
 void RecognizableInstr::processInstr(DisassemblerTables &tables,
                                      const CodeGenInstruction &insn,
                                      InstrUID uid)
@@ -283,11 +283,11 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
   // Ignore "asm parser only" instructions.
   if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
     return;
-  
+
   RecognizableInstr recogInstr(tables, insn, uid);
-  
+
   recogInstr.emitInstructionSpecifier(tables);
-  
+
   if (recogInstr.shouldBeEmitted())
     recogInstr.emitDecodePath(tables);
 }
@@ -386,55 +386,40 @@ InstructionContext RecognizableInstr::insnContext() const {
 
   return insnContext;
 }
-  
+
 RecognizableInstr::filter_ret RecognizableInstr::filter() const {
   ///////////////////
   // FILTER_STRONG
   //
-    
+
   // Filter out intrinsics
-  
-  if (!Rec->isSubClassOf("X86Inst"))
-    return FILTER_STRONG;
-  
+
+  assert(Rec->isSubClassOf("X86Inst") && "Can only filter X86 instructions");
+
   if (Form == X86Local::Pseudo ||
       (IsCodeGenOnly && Name.find("_REV") == Name.npos))
     return FILTER_STRONG;
-  
-  if (Form == X86Local::MRMInitReg)
-    return FILTER_STRONG;
-    
-    
+
+
   // Filter out artificial instructions but leave in the LOCK_PREFIX so it is
   // printed as a separate "instruction".
-    
+
   if (Name.find("_Int") != Name.npos       ||
-      Name.find("Int_") != Name.npos       ||
-      Name.find("_NOREX") != Name.npos     ||
-      Name.find("2SDL") != Name.npos)
+      Name.find("Int_") != Name.npos)
     return FILTER_STRONG;
 
   // Filter out instructions with segment override prefixes.
   // They're too messy to handle now and we'll special case them if needed.
-    
+
   if (SegOvr)
     return FILTER_STRONG;
-    
-  // Filter out instructions that can't be printed.
-    
-  if (AsmString.size() == 0)
-    return FILTER_STRONG;
-   
-  // Filter out instructions with subreg operands.
-   
-  if (AsmString.find("subreg") != AsmString.npos)
-    return FILTER_STRONG;
+
 
   /////////////////
   // FILTER_WEAK
   //
 
-    
+
   // Filter out instructions with a LOCK prefix;
   //   prefer forms that do not have the prefix
   if (HasLockPrefix)
@@ -474,9 +459,9 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_WEAK;
 
   if (HasFROperands && Name.find("MOV") != Name.npos &&
-     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || 
+     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
       (Name.find("to") != Name.npos)))
-    return FILTER_WEAK;
+    return FILTER_STRONG;
 
   return FILTER_NORMAL;
 }
@@ -487,7 +472,7 @@ bool RecognizableInstr::hasFROperands() const {
 
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     const std::string &recName = OperandList[operandIndex].Rec->getName();
-      
+
     if (recName.find("FR") != recName.npos)
       return true;
   }
@@ -497,17 +482,17 @@ bool RecognizableInstr::hasFROperands() const {
 bool RecognizableInstr::has256BitOperands() const {
   const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
   unsigned numOperands = OperandList.size();
-    
+
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     const std::string &recName = OperandList[operandIndex].Rec->getName();
-       
-    if (!recName.compare("VR256") || !recName.compare("f256mem")) {
+
+    if (!recName.compare("VR256")) {
       return true;
     }
   }
   return false;
 }
-  
+
 void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
                                       unsigned &physicalOperandIndex,
                                       unsigned &numPhysicalOperands,
@@ -521,33 +506,33 @@ void RecognizableInstr::handleOperand(bool optional, unsigned &operandIndex,
   } else {
     assert(physicalOperandIndex < numPhysicalOperands);
   }
-  
+
   while (operandMapping[operandIndex] != operandIndex) {
     Spec->operands[operandIndex].encoding = ENCODING_DUP;
     Spec->operands[operandIndex].type =
       (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
     ++operandIndex;
   }
-  
+
   const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
 
   Spec->operands[operandIndex].encoding = encodingFromString(typeName,
                                                               HasOpSizePrefix);
-  Spec->operands[operandIndex].type = typeFromString(typeName, 
+  Spec->operands[operandIndex].type = typeFromString(typeName,
                                                      IsSSE,
                                                      HasREX_WPrefix,
                                                      HasOpSizePrefix);
-  
+
   ++operandIndex;
   ++physicalOperandIndex;
 }
 
 void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   Spec->name       = Name;
-    
-  if (!Rec->isSubClassOf("X86Inst"))
+
+  if (!ShouldBeEmitted)
     return;
-  
+
   switch (filter()) {
   case FILTER_WEAK:
     Spec->filtered = true;
@@ -558,22 +543,19 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   case FILTER_NORMAL:
     break;
   }
-  
+
   Spec->insnContext = insnContext();
-    
+
   const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
-  
+
   unsigned numOperands = OperandList.size();
   unsigned numPhysicalOperands = 0;
-  
+
   // operandMapping maps from operands in OperandList to their originals.
   // If operandMapping[i] != i, then the entry is a duplicate.
   unsigned operandMapping[X86_MAX_OPERANDS];
-  
-  bool hasFROperands = false;
-  
   assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
-  
+
   for (unsigned operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     if (OperandList[operandIndex].Constraints.size()) {
       const CGIOperandList::ConstraintInfo &Constraint =
@@ -589,20 +571,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
       ++numPhysicalOperands;
       operandMapping[operandIndex] = operandIndex;
     }
-
-    const std::string &recName = OperandList[operandIndex].Rec->getName();
-
-    if (recName.find("FR") != recName.npos)
-      hasFROperands = true;
   }
-  
-  if (hasFROperands && Name.find("MOV") != Name.npos &&
-     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
-      (Name.find("to") != Name.npos)))
-    ShouldBeEmitted = false;
-  
-  if (!ShouldBeEmitted)
-    return;
 
 #define HANDLE_OPERAND(class)               \
   handleOperand(false,                      \
@@ -611,7 +580,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
                 numPhysicalOperands,        \
                 operandMapping,             \
                 class##EncodingFromString);
-  
+
 #define HANDLE_OPTIONAL(class)              \
   handleOperand(true,                       \
                 operandIndex,               \
@@ -619,17 +588,17 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
                 numPhysicalOperands,        \
                 operandMapping,             \
                 class##EncodingFromString);
-  
+
   // operandIndex should always be < numOperands
   unsigned operandIndex = 0;
   // physicalOperandIndex should always be < numPhysicalOperands
   unsigned physicalOperandIndex = 0;
-    
+
   switch (Form) {
   case X86Local::RawFrm:
     // Operand 1 (optional) is an address or immediate.
     // Operand 2 (optional) is an immediate.
-    assert(numPhysicalOperands <= 2 && 
+    assert(numPhysicalOperands <= 2 &&
            "Unexpected number of operands for RawFrm");
     HANDLE_OPTIONAL(relocation)
     HANDLE_OPTIONAL(immediate)
@@ -653,14 +622,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
              "Unexpected number of operands for MRMDestRegFrm");
-  
+
     HANDLE_OPERAND(rmRegister)
 
     if (HasVEX_4VPrefix)
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
-          
+
     HANDLE_OPERAND(roRegister)
     HANDLE_OPTIONAL(immediate)
     break;
@@ -681,7 +650,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
-          
+
     HANDLE_OPERAND(roRegister)
     HANDLE_OPTIONAL(immediate)
     break;
@@ -694,11 +663,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
 
     if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
       assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
-             "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); 
+             "Unexpected number of operands for MRMSrcRegFrm with VEX_4V");
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 4 &&
              "Unexpected number of operands for MRMSrcRegFrm");
-  
+
     HANDLE_OPERAND(roRegister)
 
     if (HasVEX_4VPrefix)
@@ -727,11 +696,11 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
 
     if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
       assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
-             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); 
+             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
              "Unexpected number of operands for MRMSrcMemFrm");
-    
+
     HANDLE_OPERAND(roRegister)
 
     if (HasVEX_4VPrefix)
@@ -813,7 +782,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     // Ignored.
     break;
   }
-  
+
   #undef HANDLE_OPERAND
   #undef HANDLE_OPTIONAL
 }
@@ -827,8 +796,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     break;
 
   OpcodeType    opcodeType  = (OpcodeType)-1;
-  
-  ModRMFilter*  filter      = NULL; 
+
+  ModRMFilter*  filter      = NULL;
   uint8_t       opcodeToSet = 0;
 
   switch (Prefix) {
@@ -1026,26 +995,26 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     if(Spec->modifierType != MODIFIER_MODRM) {
       assert(opcodeToSet < 0xf9 &&
              "Not enough room for all ADDREG_FRM operands");
-    
+
       uint8_t currentOpcode;
 
       for (currentOpcode = opcodeToSet;
            currentOpcode < opcodeToSet + 8;
            ++currentOpcode)
-        tables.setTableFields(opcodeType, 
-                              insnContext(), 
-                              currentOpcode, 
-                              *filter, 
+        tables.setTableFields(opcodeType,
+                              insnContext(),
+                              currentOpcode,
+                              *filter,
                               UID, Is32Bit, IgnoresVEX_L);
-    
+
       Spec->modifierType = MODIFIER_OPCODE;
       Spec->modifierBase = opcodeToSet;
     } else {
       // modifierBase was set where MODIFIER_MODRM was set
-      tables.setTableFields(opcodeType, 
-                            insnContext(), 
-                            opcodeToSet, 
-                            *filter, 
+      tables.setTableFields(opcodeType,
+                            insnContext(),
+                            opcodeToSet,
+                            *filter,
                             UID, Is32Bit, IgnoresVEX_L);
     }
   } else {
@@ -1054,13 +1023,13 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
                           opcodeToSet,
                           *filter,
                           UID, Is32Bit, IgnoresVEX_L);
-    
+
     Spec->modifierType = MODIFIER_NONE;
     Spec->modifierBase = opcodeToSet;
   }
-  
+
   delete filter;
-  
+
 #undef MAP
 }
 
@@ -1070,7 +1039,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
                                               bool hasREX_WPrefix,
                                               bool hasOpSizePrefix) {
   if (isSSE) {
-    // For SSE instructions, we ignore the OpSize prefix and force operand 
+    // For SSE instructions, we ignore the OpSize prefix and force operand
     // sizes.
     TYPE("GR16",              TYPE_R16)
     TYPE("GR32",              TYPE_R32)
diff --git a/utils/UpdateCMakeLists.pl b/utils/UpdateCMakeLists.pl
index 8f53514..d92a767 100755
--- a/utils/UpdateCMakeLists.pl
+++ b/utils/UpdateCMakeLists.pl
@@ -68,8 +68,7 @@ sub UpdateCMake {
   while(<IN>) {
     if (!$foundLibrary) {
       print OUT $_;
-      if (/^add_clang_library\(/ || /^add_llvm_library\(/ || /^add_llvm_target\(/
-          || /^add_executable\(/) {
+      if (/^add_[^_]+_library\(/ || /^add_llvm_target\(/ || /^add_executable\(/) {
         $foundLibrary = 1;
         EmitCMakeList($dir);
       }
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 994fb06..6aee831 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -133,7 +133,7 @@ if [ \! -f Makefile.config ]; then
     || exit 1
 fi
 
-SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'`
+SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/.*\.\([0-9]*\).*/\1/'`
 
 if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then
     LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION`
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index 039868d..25bbcbd 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -566,6 +566,9 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     if opts.maxTests is not None:
         tests = tests[:opts.maxTests]
 
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
     extra = ''
     if len(tests) != numTotalTests:
         extra = ' of %d' % numTotalTests
@@ -589,9 +592,6 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
         else:
             print header
 
-    # Don't create more threads than tests.
-    opts.numThreads = min(len(tests), opts.numThreads)
-
     startTime = time.time()
     display = TestingProgressDisplay(opts, len(tests), progressBar)
     provider = TestProvider(tests, opts.maxTime)
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 322036b..ad2799f 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -175,6 +175,7 @@ FuncAttr      ::= noreturn
  | returns_twice
  | nonlazybind
  | address_safety
+ | ia_nsdialect
  ;
 
 OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp
index 2dbd531..c9a7159 100644
--- a/utils/obj2yaml/coff2yaml.cpp
+++ b/utils/obj2yaml/coff2yaml.cpp
@@ -276,7 +276,8 @@ static llvm::raw_ostream &yamlCOFFSections(llvm::object::COFFObjectFile &Obj,
     Obj.getSectionContents(sect, sectionData);    
     Out << "    SectionData: ";
     yaml::writeHexStream(Out, sectionData) << endl;
-    
+    if (iter->begin_relocations() != iter->end_relocations())
+      Out << "    Relocations:\n";
     for (llvm::object::relocation_iterator rIter = iter->begin_relocations();
                        rIter != iter->end_relocations(); rIter.increment(ec)) {
       const llvm::object::coff_relocation *reloc = Obj.getCOFFRelocation(rIter);
diff --git a/utils/yaml2obj/CMakeLists.txt b/utils/yaml2obj/CMakeLists.txt
new file mode 100644
index 0000000..f8b1197
--- /dev/null
+++ b/utils/yaml2obj/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(yaml2obj
+  yaml2obj.cpp
+  )
+
+target_link_libraries(yaml2obj LLVMSupport)
diff --git a/utils/yaml2obj/Makefile b/utils/yaml2obj/Makefile
new file mode 100644
index 0000000..e746d85
--- /dev/null
+++ b/utils/yaml2obj/Makefile
@@ -0,0 +1,20 @@
+##===- utils/yaml2obj/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = yaml2obj
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp
new file mode 100644
index 0000000..c3b3e54
--- /dev/null
+++ b/utils/yaml2obj/yaml2obj.cpp
@@ -0,0 +1,879 @@
+//===- yaml2obj - Convert YAML to a binary object file --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program takes a YAML description of an object file and outputs the
+// binary equivalent.
+//
+// This is used for writing tests that require binary files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/YAMLParser.h"
+
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+  Input(cl::Positional, cl::desc("<input>"), cl::init("-"));
+
+template<class T>
+typename llvm::enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
+getAs(const llvm::yaml::ScalarNode *SN, T &Result) {
+  SmallString<4> Storage;
+  StringRef Value = SN->getValue(Storage);
+  if (Value.getAsInteger(0, Result))
+    return false;
+  return true;
+}
+
+// Given a container with begin and end with ::value_type of a character type.
+// Iterate through pairs of characters in the the set of [a-fA-F0-9] ignoring
+// all other characters.
+struct hex_pair_iterator {
+  StringRef::const_iterator Current, End;
+  typedef SmallVector<char, 2> value_type;
+  value_type Pair;
+  bool IsDone;
+
+  hex_pair_iterator(StringRef C)
+    : Current(C.begin()), End(C.end()), IsDone(false) {
+    // Initalize Pair.
+    ++*this;
+  }
+
+  // End iterator.
+  hex_pair_iterator() : Current(), End(), IsDone(true) {}
+
+  value_type operator *() const {
+    return Pair;
+  }
+
+  hex_pair_iterator operator ++() {
+    // We're at the end of the input.
+    if (Current == End) {
+      IsDone = true;
+      return *this;
+    }
+    Pair = value_type();
+    for (; Current != End && Pair.size() != 2; ++Current) {
+      // Is a valid hex digit.
+      if ((*Current >= '0' && *Current <= '9') ||
+          (*Current >= 'a' && *Current <= 'f') ||
+          (*Current >= 'A' && *Current <= 'F'))
+        Pair.push_back(*Current);
+    }
+    // Hit the end without getting 2 hex digits. Pair is invalid.
+    if (Pair.size() != 2)
+      IsDone = true;
+    return *this;
+  }
+
+  bool operator ==(const hex_pair_iterator Other) {
+    return (IsDone == Other.IsDone) ||
+           (Current == Other.Current && End == Other.End);
+  }
+
+  bool operator !=(const hex_pair_iterator Other) {
+    return !(*this == Other);
+  }
+};
+
+template <class ContainerOut>
+static bool hexStringToByteArray(StringRef Str, ContainerOut &Out) {
+  for (hex_pair_iterator I(Str), E; I != E; ++I) {
+    typename hex_pair_iterator::value_type Pair = *I;
+    typename ContainerOut::value_type Byte;
+    if (StringRef(Pair.data(), 2).getAsInteger(16, Byte))
+      return false;
+    Out.push_back(Byte);
+  }
+  return true;
+}
+
+/// This parses a yaml stream that represents a COFF object file.
+/// See docs/yaml2obj for the yaml scheema.
+struct COFFParser {
+  COFFParser(yaml::Stream &Input) : YS(Input) {
+    std::memset(&Header, 0, sizeof(Header));
+    // A COFF string table always starts with a 4 byte size field. Offsets into
+    // it include this size, so allocate it now.
+    StringTable.append(4, 0);
+  }
+
+  bool parseHeader(yaml::Node *HeaderN) {
+    yaml::MappingNode *MN = dyn_cast<yaml::MappingNode>(HeaderN);
+    if (!MN) {
+      YS.printError(HeaderN, "header's value must be a mapping node");
+      return false;
+    }
+    for (yaml::MappingNode::iterator i = MN->begin(), e = MN->end();
+                                     i != e; ++i) {
+      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
+      if (!Key) {
+        YS.printError(i->getKey(), "Keys must be scalar values");
+        return false;
+      }
+      SmallString<32> Storage;
+      StringRef KeyValue = Key->getValue(Storage);
+      if (KeyValue == "Characteristics") {
+        if (!parseHeaderCharacteristics(i->getValue()))
+          return false;
+      } else {
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(i->getValue());
+        if (!Value) {
+          YS.printError(Value,
+            Twine(KeyValue) + " must be a scalar value");
+          return false;
+        }
+        if (KeyValue == "Machine") {
+          uint16_t Machine;
+          if (!getAs(Value, Machine)) {
+            // It's not a raw number, try matching the string.
+            StringRef ValueValue = Value->getValue(Storage);
+            Machine = StringSwitch<COFF::MachineTypes>(ValueValue)
+              .Case( "IMAGE_FILE_MACHINE_UNKNOWN"
+                   , COFF::IMAGE_FILE_MACHINE_UNKNOWN)
+              .Case( "IMAGE_FILE_MACHINE_AM33"
+                   , COFF::IMAGE_FILE_MACHINE_AM33)
+              .Case( "IMAGE_FILE_MACHINE_AMD64"
+                   , COFF::IMAGE_FILE_MACHINE_AMD64)
+              .Case( "IMAGE_FILE_MACHINE_ARM"
+                   , COFF::IMAGE_FILE_MACHINE_ARM)
+              .Case( "IMAGE_FILE_MACHINE_ARMV7"
+                   , COFF::IMAGE_FILE_MACHINE_ARMV7)
+              .Case( "IMAGE_FILE_MACHINE_EBC"
+                   , COFF::IMAGE_FILE_MACHINE_EBC)
+              .Case( "IMAGE_FILE_MACHINE_I386"
+                   , COFF::IMAGE_FILE_MACHINE_I386)
+              .Case( "IMAGE_FILE_MACHINE_IA64"
+                   , COFF::IMAGE_FILE_MACHINE_IA64)
+              .Case( "IMAGE_FILE_MACHINE_M32R"
+                   , COFF::IMAGE_FILE_MACHINE_M32R)
+              .Case( "IMAGE_FILE_MACHINE_MIPS16"
+                   , COFF::IMAGE_FILE_MACHINE_MIPS16)
+              .Case( "IMAGE_FILE_MACHINE_MIPSFPU"
+                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU)
+              .Case( "IMAGE_FILE_MACHINE_MIPSFPU16"
+                   , COFF::IMAGE_FILE_MACHINE_MIPSFPU16)
+              .Case( "IMAGE_FILE_MACHINE_POWERPC"
+                   , COFF::IMAGE_FILE_MACHINE_POWERPC)
+              .Case( "IMAGE_FILE_MACHINE_POWERPCFP"
+                   , COFF::IMAGE_FILE_MACHINE_POWERPCFP)
+              .Case( "IMAGE_FILE_MACHINE_R4000"
+                   , COFF::IMAGE_FILE_MACHINE_R4000)
+              .Case( "IMAGE_FILE_MACHINE_SH3"
+                   , COFF::IMAGE_FILE_MACHINE_SH3)
+              .Case( "IMAGE_FILE_MACHINE_SH3DSP"
+                   , COFF::IMAGE_FILE_MACHINE_SH3DSP)
+              .Case( "IMAGE_FILE_MACHINE_SH4"
+                   , COFF::IMAGE_FILE_MACHINE_SH4)
+              .Case( "IMAGE_FILE_MACHINE_SH5"
+                   , COFF::IMAGE_FILE_MACHINE_SH5)
+              .Case( "IMAGE_FILE_MACHINE_THUMB"
+                   , COFF::IMAGE_FILE_MACHINE_THUMB)
+              .Case( "IMAGE_FILE_MACHINE_WCEMIPSV2"
+                   , COFF::IMAGE_FILE_MACHINE_WCEMIPSV2)
+              .Default(COFF::MT_Invalid);
+            if (Machine == COFF::MT_Invalid) {
+              YS.printError(Value, "Invalid value for Machine");
+              return false;
+            }
+          }
+          Header.Machine = Machine;
+        } else if (KeyValue == "NumberOfSections") {
+          if (!getAs(Value, Header.NumberOfSections)) {
+              YS.printError(Value, "Invalid value for NumberOfSections");
+              return false;
+          }
+        } else if (KeyValue == "TimeDateStamp") {
+          if (!getAs(Value, Header.TimeDateStamp)) {
+              YS.printError(Value, "Invalid value for TimeDateStamp");
+              return false;
+          }
+        } else if (KeyValue == "PointerToSymbolTable") {
+          if (!getAs(Value, Header.PointerToSymbolTable)) {
+              YS.printError(Value, "Invalid value for PointerToSymbolTable");
+              return false;
+          }
+        } else if (KeyValue == "NumberOfSymbols") {
+          if (!getAs(Value, Header.NumberOfSymbols)) {
+              YS.printError(Value, "Invalid value for NumberOfSymbols");
+              return false;
+          }
+        } else if (KeyValue == "SizeOfOptionalHeader") {
+          if (!getAs(Value, Header.SizeOfOptionalHeader)) {
+              YS.printError(Value, "Invalid value for SizeOfOptionalHeader");
+              return false;
+          }
+        } else {
+          YS.printError(Key, "Unrecognized key in header");
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  bool parseHeaderCharacteristics(yaml::Node *Characteristics) {
+    yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(Characteristics);
+    yaml::SequenceNode *SeqValue
+      = dyn_cast<yaml::SequenceNode>(Characteristics);
+    if (!Value && !SeqValue) {
+      YS.printError(Characteristics,
+        "Characteristics must either be a number or sequence");
+      return false;
+    }
+    if (Value) {
+      if (!getAs(Value, Header.Characteristics)) {
+        YS.printError(Value, "Invalid value for Characteristics");
+        return false;
+      }
+    } else {
+      for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
+                                        ce = SeqValue->end();
+                                        ci != ce; ++ci) {
+        yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
+        if (!CharValue) {
+          YS.printError(CharValue,
+            "Characteristics must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef Char = CharValue->getValue(Storage);
+        uint16_t Characteristic = StringSwitch<COFF::Characteristics>(Char)
+          .Case( "IMAGE_FILE_RELOCS_STRIPPED"
+                , COFF::IMAGE_FILE_RELOCS_STRIPPED)
+          .Case( "IMAGE_FILE_EXECUTABLE_IMAGE"
+                , COFF::IMAGE_FILE_EXECUTABLE_IMAGE)
+          .Case( "IMAGE_FILE_LINE_NUMS_STRIPPED"
+                , COFF::IMAGE_FILE_LINE_NUMS_STRIPPED)
+          .Case( "IMAGE_FILE_LOCAL_SYMS_STRIPPED"
+                , COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED)
+          .Case( "IMAGE_FILE_AGGRESSIVE_WS_TRIM"
+                , COFF::IMAGE_FILE_AGGRESSIVE_WS_TRIM)
+          .Case( "IMAGE_FILE_LARGE_ADDRESS_AWARE"
+                , COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE)
+          .Case( "IMAGE_FILE_BYTES_REVERSED_LO"
+                , COFF::IMAGE_FILE_BYTES_REVERSED_LO)
+          .Case( "IMAGE_FILE_32BIT_MACHINE"
+                , COFF::IMAGE_FILE_32BIT_MACHINE)
+          .Case( "IMAGE_FILE_DEBUG_STRIPPED"
+                , COFF::IMAGE_FILE_DEBUG_STRIPPED)
+          .Case( "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP"
+                , COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP)
+          .Case( "IMAGE_FILE_SYSTEM"
+                , COFF::IMAGE_FILE_SYSTEM)
+          .Case( "IMAGE_FILE_DLL"
+                , COFF::IMAGE_FILE_DLL)
+          .Case( "IMAGE_FILE_UP_SYSTEM_ONLY"
+                , COFF::IMAGE_FILE_UP_SYSTEM_ONLY)
+          .Default(COFF::C_Invalid);
+        if (Characteristic == COFF::C_Invalid) {
+          // TODO: Typo-correct.
+          YS.printError(CharValue,
+            "Invalid value for Characteristic");
+          return false;
+        }
+        Header.Characteristics |= Characteristic;
+      }
+    }
+    return true;
+  }
+
+  bool parseSections(yaml::Node *SectionsN) {
+    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SectionsN);
+    if (!SN) {
+      YS.printError(SectionsN, "Sections must be a sequence");
+      return false;
+    }
+    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
+                                      i != e; ++i) {
+      Section Sec;
+      std::memset(&Sec.Header, 0, sizeof(Sec.Header));
+      yaml::MappingNode *SecMap = dyn_cast<yaml::MappingNode>(&*i);
+      if (!SecMap) {
+        YS.printError(&*i, "Section entry must be a map");
+        return false;
+      }
+      for (yaml::MappingNode::iterator si = SecMap->begin(), se = SecMap->end();
+                                       si != se; ++si) {
+        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
+        if (!Key) {
+          YS.printError(si->getKey(), "Keys must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef KeyValue = Key->getValue(Storage);
+
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+        if (KeyValue == "Name") {
+          // If the name is less than 8 bytes, store it in place, otherwise
+          // store it in the string table.
+          StringRef Name = Value->getValue(Storage);
+          std::fill_n(Sec.Header.Name, unsigned(COFF::NameSize), 0);
+          if (Name.size() <= COFF::NameSize) {
+            std::copy(Name.begin(), Name.end(), Sec.Header.Name);
+          } else {
+            // Add string to the string table and format the index for output.
+            unsigned Index = getStringIndex(Name);
+            std::string str = utostr(Index);
+            if (str.size() > 7) {
+              YS.printError(Value, "String table got too large");
+              return false;
+            }
+            Sec.Header.Name[0] = '/';
+            std::copy(str.begin(), str.end(), Sec.Header.Name + 1);
+          }
+        } else if (KeyValue == "VirtualSize") {
+          if (!getAs(Value, Sec.Header.VirtualSize)) {
+            YS.printError(Value, "Invalid value for VirtualSize");
+            return false;
+          }
+        } else if (KeyValue == "VirtualAddress") {
+          if (!getAs(Value, Sec.Header.VirtualAddress)) {
+            YS.printError(Value, "Invalid value for VirtualAddress");
+            return false;
+          }
+        } else if (KeyValue == "SizeOfRawData") {
+          if (!getAs(Value, Sec.Header.SizeOfRawData)) {
+            YS.printError(Value, "Invalid value for SizeOfRawData");
+            return false;
+          }
+        } else if (KeyValue == "PointerToRawData") {
+          if (!getAs(Value, Sec.Header.PointerToRawData)) {
+            YS.printError(Value, "Invalid value for PointerToRawData");
+            return false;
+          }
+        } else if (KeyValue == "PointerToRelocations") {
+          if (!getAs(Value, Sec.Header.PointerToRelocations)) {
+            YS.printError(Value, "Invalid value for PointerToRelocations");
+            return false;
+          }
+        } else if (KeyValue == "PointerToLineNumbers") {
+          if (!getAs(Value, Sec.Header.PointerToLineNumbers)) {
+            YS.printError(Value, "Invalid value for PointerToLineNumbers");
+            return false;
+          }
+        } else if (KeyValue == "NumberOfRelocations") {
+          if (!getAs(Value, Sec.Header.NumberOfRelocations)) {
+            YS.printError(Value, "Invalid value for NumberOfRelocations");
+            return false;
+          }
+        } else if (KeyValue == "NumberOfLineNumbers") {
+          if (!getAs(Value, Sec.Header.NumberOfLineNumbers)) {
+            YS.printError(Value, "Invalid value for NumberOfLineNumbers");
+            return false;
+          }
+        } else if (KeyValue == "Characteristics") {
+          yaml::SequenceNode *SeqValue
+            = dyn_cast<yaml::SequenceNode>(si->getValue());
+          if (!Value && !SeqValue) {
+            YS.printError(si->getValue(),
+              "Characteristics must either be a number or sequence");
+            return false;
+          }
+          if (Value) {
+            if (!getAs(Value, Sec.Header.Characteristics)) {
+              YS.printError(Value, "Invalid value for Characteristics");
+              return false;
+            }
+          } else {
+            for (yaml::SequenceNode::iterator ci = SeqValue->begin(),
+                                              ce = SeqValue->end();
+                                              ci != ce; ++ci) {
+              yaml::ScalarNode *CharValue = dyn_cast<yaml::ScalarNode>(&*ci);
+              if (!CharValue) {
+                YS.printError(CharValue, "Invalid value for Characteristics");
+                return false;
+              }
+              StringRef Char = CharValue->getValue(Storage);
+              uint32_t Characteristic =
+                StringSwitch<COFF::SectionCharacteristics>(Char)
+                .Case( "IMAGE_SCN_TYPE_NO_PAD"
+                     , COFF::IMAGE_SCN_TYPE_NO_PAD)
+                .Case( "IMAGE_SCN_CNT_CODE"
+                     , COFF::IMAGE_SCN_CNT_CODE)
+                .Case( "IMAGE_SCN_CNT_INITIALIZED_DATA"
+                     , COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+                .Case( "IMAGE_SCN_CNT_UNINITIALIZED_DATA"
+                     , COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+                .Case( "IMAGE_SCN_LNK_OTHER"
+                     , COFF::IMAGE_SCN_LNK_OTHER)
+                .Case( "IMAGE_SCN_LNK_INFO"
+                     , COFF::IMAGE_SCN_LNK_INFO)
+                .Case( "IMAGE_SCN_LNK_REMOVE"
+                     , COFF::IMAGE_SCN_LNK_REMOVE)
+                .Case( "IMAGE_SCN_LNK_COMDAT"
+                     , COFF::IMAGE_SCN_LNK_COMDAT)
+                .Case( "IMAGE_SCN_GPREL"
+                     , COFF::IMAGE_SCN_GPREL)
+                .Case( "IMAGE_SCN_MEM_PURGEABLE"
+                     , COFF::IMAGE_SCN_MEM_PURGEABLE)
+                .Case( "IMAGE_SCN_MEM_16BIT"
+                     , COFF::IMAGE_SCN_MEM_16BIT)
+                .Case( "IMAGE_SCN_MEM_LOCKED"
+                     , COFF::IMAGE_SCN_MEM_LOCKED)
+                .Case( "IMAGE_SCN_MEM_PRELOAD"
+                     , COFF::IMAGE_SCN_MEM_PRELOAD)
+                .Case( "IMAGE_SCN_ALIGN_1BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_1BYTES)
+                .Case( "IMAGE_SCN_ALIGN_2BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_2BYTES)
+                .Case( "IMAGE_SCN_ALIGN_4BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_4BYTES)
+                .Case( "IMAGE_SCN_ALIGN_8BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_8BYTES)
+                .Case( "IMAGE_SCN_ALIGN_16BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_16BYTES)
+                .Case( "IMAGE_SCN_ALIGN_32BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_32BYTES)
+                .Case( "IMAGE_SCN_ALIGN_64BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_64BYTES)
+                .Case( "IMAGE_SCN_ALIGN_128BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_128BYTES)
+                .Case( "IMAGE_SCN_ALIGN_256BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_256BYTES)
+                .Case( "IMAGE_SCN_ALIGN_512BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_512BYTES)
+                .Case( "IMAGE_SCN_ALIGN_1024BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_1024BYTES)
+                .Case( "IMAGE_SCN_ALIGN_2048BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_2048BYTES)
+                .Case( "IMAGE_SCN_ALIGN_4096BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_4096BYTES)
+                .Case( "IMAGE_SCN_ALIGN_8192BYTES"
+                     , COFF::IMAGE_SCN_ALIGN_8192BYTES)
+                .Case( "IMAGE_SCN_LNK_NRELOC_OVFL"
+                     , COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
+                .Case( "IMAGE_SCN_MEM_DISCARDABLE"
+                     , COFF::IMAGE_SCN_MEM_DISCARDABLE)
+                .Case( "IMAGE_SCN_MEM_NOT_CACHED"
+                     , COFF::IMAGE_SCN_MEM_NOT_CACHED)
+                .Case( "IMAGE_SCN_MEM_NOT_PAGED"
+                     , COFF::IMAGE_SCN_MEM_NOT_PAGED)
+                .Case( "IMAGE_SCN_MEM_SHARED"
+                     , COFF::IMAGE_SCN_MEM_SHARED)
+                .Case( "IMAGE_SCN_MEM_EXECUTE"
+                     , COFF::IMAGE_SCN_MEM_EXECUTE)
+                .Case( "IMAGE_SCN_MEM_READ"
+                     , COFF::IMAGE_SCN_MEM_READ)
+                .Case( "IMAGE_SCN_MEM_WRITE"
+                     , COFF::IMAGE_SCN_MEM_WRITE)
+                .Default(COFF::SC_Invalid);
+              if (Characteristic == COFF::SC_Invalid) {
+                YS.printError(CharValue, "Invalid value for Characteristic");
+                return false;
+              }
+              Sec.Header.Characteristics |= Characteristic;
+            }
+          }
+        } else if (KeyValue == "SectionData") {
+          yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+          SmallString<32> Storage;
+          StringRef Data = Value->getValue(Storage);
+          if (!hexStringToByteArray(Data, Sec.Data)) {
+            YS.printError(Value, "SectionData must be a collection of pairs of"
+                                 "hex bytes");
+            return false;
+          }
+        } else
+          si->skip();
+      }
+      Sections.push_back(Sec);
+    }
+    return true;
+  }
+
+  bool parseSymbols(yaml::Node *SymbolsN) {
+    yaml::SequenceNode *SN = dyn_cast<yaml::SequenceNode>(SymbolsN);
+    if (!SN) {
+      YS.printError(SymbolsN, "Symbols must be a sequence");
+      return false;
+    }
+    for (yaml::SequenceNode::iterator i = SN->begin(), e = SN->end();
+                                      i != e; ++i) {
+      Symbol Sym;
+      std::memset(&Sym.Header, 0, sizeof(Sym.Header));
+      yaml::MappingNode *SymMap = dyn_cast<yaml::MappingNode>(&*i);
+      if (!SymMap) {
+        YS.printError(&*i, "Symbol must be a map");
+        return false;
+      }
+      for (yaml::MappingNode::iterator si = SymMap->begin(), se = SymMap->end();
+                                       si != se; ++si) {
+        yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(si->getKey());
+        if (!Key) {
+          YS.printError(si->getKey(), "Keys must be scalar values");
+          return false;
+        }
+        SmallString<32> Storage;
+        StringRef KeyValue = Key->getValue(Storage);
+
+        yaml::ScalarNode *Value = dyn_cast<yaml::ScalarNode>(si->getValue());
+        if (!Value) {
+          YS.printError(si->getValue(), "Must be a scalar value");
+          return false;
+        }
+        if (KeyValue == "Name") {
+          // If the name is less than 8 bytes, store it in place, otherwise
+          // store it in the string table.
+          StringRef Name = Value->getValue(Storage);
+          std::fill_n(Sym.Header.Name, unsigned(COFF::NameSize), 0);
+          if (Name.size() <= COFF::NameSize) {
+            std::copy(Name.begin(), Name.end(), Sym.Header.Name);
+          } else {
+            // Add string to the string table and format the index for output.
+            unsigned Index = getStringIndex(Name);
+            *reinterpret_cast<support::aligned_ulittle32_t*>(
+              Sym.Header.Name + 4) = Index;
+          }
+        } else if (KeyValue == "Value") {
+          if (!getAs(Value, Sym.Header.Value)) {
+            YS.printError(Value, "Invalid value for Value");
+            return false;
+          }
+        } else if (KeyValue == "SimpleType") {
+          Sym.Header.Type |= StringSwitch<COFF::SymbolBaseType>(
+            Value->getValue(Storage))
+            .Case("IMAGE_SYM_TYPE_NULL", COFF::IMAGE_SYM_TYPE_NULL)
+            .Case("IMAGE_SYM_TYPE_VOID", COFF::IMAGE_SYM_TYPE_VOID)
+            .Case("IMAGE_SYM_TYPE_CHAR", COFF::IMAGE_SYM_TYPE_CHAR)
+            .Case("IMAGE_SYM_TYPE_SHORT", COFF::IMAGE_SYM_TYPE_SHORT)
+            .Case("IMAGE_SYM_TYPE_INT", COFF::IMAGE_SYM_TYPE_INT)
+            .Case("IMAGE_SYM_TYPE_LONG", COFF::IMAGE_SYM_TYPE_LONG)
+            .Case("IMAGE_SYM_TYPE_FLOAT", COFF::IMAGE_SYM_TYPE_FLOAT)
+            .Case("IMAGE_SYM_TYPE_DOUBLE", COFF::IMAGE_SYM_TYPE_DOUBLE)
+            .Case("IMAGE_SYM_TYPE_STRUCT", COFF::IMAGE_SYM_TYPE_STRUCT)
+            .Case("IMAGE_SYM_TYPE_UNION", COFF::IMAGE_SYM_TYPE_UNION)
+            .Case("IMAGE_SYM_TYPE_ENUM", COFF::IMAGE_SYM_TYPE_ENUM)
+            .Case("IMAGE_SYM_TYPE_MOE", COFF::IMAGE_SYM_TYPE_MOE)
+            .Case("IMAGE_SYM_TYPE_BYTE", COFF::IMAGE_SYM_TYPE_BYTE)
+            .Case("IMAGE_SYM_TYPE_WORD", COFF::IMAGE_SYM_TYPE_WORD)
+            .Case("IMAGE_SYM_TYPE_UINT", COFF::IMAGE_SYM_TYPE_UINT)
+            .Case("IMAGE_SYM_TYPE_DWORD", COFF::IMAGE_SYM_TYPE_DWORD)
+            .Default(COFF::IMAGE_SYM_TYPE_NULL);
+        } else if (KeyValue == "ComplexType") {
+          Sym.Header.Type |= StringSwitch<COFF::SymbolComplexType>(
+            Value->getValue(Storage))
+            .Case("IMAGE_SYM_DTYPE_NULL", COFF::IMAGE_SYM_DTYPE_NULL)
+            .Case("IMAGE_SYM_DTYPE_POINTER", COFF::IMAGE_SYM_DTYPE_POINTER)
+            .Case("IMAGE_SYM_DTYPE_FUNCTION", COFF::IMAGE_SYM_DTYPE_FUNCTION)
+            .Case("IMAGE_SYM_DTYPE_ARRAY", COFF::IMAGE_SYM_DTYPE_ARRAY)
+            .Default(COFF::IMAGE_SYM_DTYPE_NULL)
+            << COFF::SCT_COMPLEX_TYPE_SHIFT;
+        } else if (KeyValue == "StorageClass") {
+          Sym.Header.StorageClass = StringSwitch<COFF::SymbolStorageClass>(
+            Value->getValue(Storage))
+            .Case( "IMAGE_SYM_CLASS_END_OF_FUNCTION"
+                 , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION)
+            .Case( "IMAGE_SYM_CLASS_NULL"
+                 , COFF::IMAGE_SYM_CLASS_NULL)
+            .Case( "IMAGE_SYM_CLASS_AUTOMATIC"
+                 , COFF::IMAGE_SYM_CLASS_AUTOMATIC)
+            .Case( "IMAGE_SYM_CLASS_EXTERNAL"
+                 , COFF::IMAGE_SYM_CLASS_EXTERNAL)
+            .Case( "IMAGE_SYM_CLASS_STATIC"
+                 , COFF::IMAGE_SYM_CLASS_STATIC)
+            .Case( "IMAGE_SYM_CLASS_REGISTER"
+                 , COFF::IMAGE_SYM_CLASS_REGISTER)
+            .Case( "IMAGE_SYM_CLASS_EXTERNAL_DEF"
+                 , COFF::IMAGE_SYM_CLASS_EXTERNAL_DEF)
+            .Case( "IMAGE_SYM_CLASS_LABEL"
+                 , COFF::IMAGE_SYM_CLASS_LABEL)
+            .Case( "IMAGE_SYM_CLASS_UNDEFINED_LABEL"
+                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_LABEL)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_STRUCT"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_STRUCT)
+            .Case( "IMAGE_SYM_CLASS_ARGUMENT"
+                 , COFF::IMAGE_SYM_CLASS_ARGUMENT)
+            .Case( "IMAGE_SYM_CLASS_STRUCT_TAG"
+                 , COFF::IMAGE_SYM_CLASS_STRUCT_TAG)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_UNION"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_UNION)
+            .Case( "IMAGE_SYM_CLASS_UNION_TAG"
+                 , COFF::IMAGE_SYM_CLASS_UNION_TAG)
+            .Case( "IMAGE_SYM_CLASS_TYPE_DEFINITION"
+                 , COFF::IMAGE_SYM_CLASS_TYPE_DEFINITION)
+            .Case( "IMAGE_SYM_CLASS_UNDEFINED_STATIC"
+                 , COFF::IMAGE_SYM_CLASS_UNDEFINED_STATIC)
+            .Case( "IMAGE_SYM_CLASS_ENUM_TAG"
+                 , COFF::IMAGE_SYM_CLASS_ENUM_TAG)
+            .Case( "IMAGE_SYM_CLASS_MEMBER_OF_ENUM"
+                 , COFF::IMAGE_SYM_CLASS_MEMBER_OF_ENUM)
+            .Case( "IMAGE_SYM_CLASS_REGISTER_PARAM"
+                 , COFF::IMAGE_SYM_CLASS_REGISTER_PARAM)
+            .Case( "IMAGE_SYM_CLASS_BIT_FIELD"
+                 , COFF::IMAGE_SYM_CLASS_BIT_FIELD)
+            .Case( "IMAGE_SYM_CLASS_BLOCK"
+                 , COFF::IMAGE_SYM_CLASS_BLOCK)
+            .Case( "IMAGE_SYM_CLASS_FUNCTION"
+                 , COFF::IMAGE_SYM_CLASS_FUNCTION)
+            .Case( "IMAGE_SYM_CLASS_END_OF_STRUCT"
+                 , COFF::IMAGE_SYM_CLASS_END_OF_STRUCT)
+            .Case( "IMAGE_SYM_CLASS_FILE"
+                 , COFF::IMAGE_SYM_CLASS_FILE)
+            .Case( "IMAGE_SYM_CLASS_SECTION"
+                 , COFF::IMAGE_SYM_CLASS_SECTION)
+            .Case( "IMAGE_SYM_CLASS_WEAK_EXTERNAL"
+                 , COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+            .Case( "IMAGE_SYM_CLASS_CLR_TOKEN"
+                 , COFF::IMAGE_SYM_CLASS_CLR_TOKEN)
+            .Default(COFF::SSC_Invalid);
+          if (Sym.Header.StorageClass == COFF::SSC_Invalid) {
+            YS.printError(Value, "Invalid value for StorageClass");
+            return false;
+          }
+        } else if (KeyValue == "SectionNumber") {
+          if (!getAs(Value, Sym.Header.SectionNumber)) {
+              YS.printError(Value, "Invalid value for SectionNumber");
+              return false;
+          }
+        } else if (KeyValue == "AuxillaryData") {
+          StringRef Data = Value->getValue(Storage);
+          if (!hexStringToByteArray(Data, Sym.AuxSymbols)) {
+            YS.printError(Value, "AuxillaryData must be a collection of pairs"
+                                 "of hex bytes");
+            return false;
+          }
+        } else
+          si->skip();
+      }
+      Symbols.push_back(Sym);
+    }
+    return true;
+  }
+
+  bool parse() {
+    yaml::Document &D = *YS.begin();
+    yaml::MappingNode *Root = dyn_cast<yaml::MappingNode>(D.getRoot());
+    if (!Root) {
+      YS.printError(D.getRoot(), "Root node must be a map");
+      return false;
+    }
+    for (yaml::MappingNode::iterator i = Root->begin(), e = Root->end();
+                                     i != e; ++i) {
+      yaml::ScalarNode *Key = dyn_cast<yaml::ScalarNode>(i->getKey());
+      if (!Key) {
+        YS.printError(i->getKey(), "Keys must be scalar values");
+        return false;
+      }
+      SmallString<32> Storage;
+      StringRef KeyValue = Key->getValue(Storage);
+      if (KeyValue == "header") {
+        if (!parseHeader(i->getValue()))
+          return false;
+      } else if (KeyValue == "sections") {
+        if (!parseSections(i->getValue()))
+          return false;
+      } else if (KeyValue == "symbols") {
+        if (!parseSymbols(i->getValue()))
+          return false;
+      }
+    }
+    return !YS.failed();
+  }
+
+  unsigned getStringIndex(StringRef Str) {
+    StringMap<unsigned>::iterator i = StringTableMap.find(Str);
+    if (i == StringTableMap.end()) {
+      unsigned Index = StringTable.size();
+      StringTable.append(Str.begin(), Str.end());
+      StringTable.push_back(0);
+      StringTableMap[Str] = Index;
+      return Index;
+    }
+    return i->second;
+  }
+
+  yaml::Stream &YS;
+  COFF::header Header;
+
+  struct Section {
+    COFF::section Header;
+    std::vector<uint8_t> Data;
+    std::vector<COFF::relocation> Relocations;
+  };
+
+  struct Symbol {
+    COFF::symbol Header;
+    std::vector<uint8_t> AuxSymbols;
+  };
+
+  std::vector<Section> Sections;
+  std::vector<Symbol> Symbols;
+  StringMap<unsigned> StringTableMap;
+  std::string StringTable;
+};
+
+// Take a CP and assign addresses and sizes to everything. Returns false if the
+// layout is not valid to do.
+static bool layoutCOFF(COFFParser &CP) {
+  uint32_t SectionTableStart = 0;
+  uint32_t SectionTableSize  = 0;
+
+  // The section table starts immediately after the header, including the
+  // optional header.
+  SectionTableStart = sizeof(COFF::header) + CP.Header.SizeOfOptionalHeader;
+  SectionTableSize = sizeof(COFF::section) * CP.Sections.size();
+
+  uint32_t CurrentSectionDataOffset = SectionTableStart + SectionTableSize;
+
+  // Assign each section data address consecutively.
+  for (std::vector<COFFParser::Section>::iterator i = CP.Sections.begin(),
+                                                  e = CP.Sections.end();
+                                                  i != e; ++i) {
+    if (!i->Data.empty()) {
+      i->Header.SizeOfRawData = i->Data.size();
+      i->Header.PointerToRawData = CurrentSectionDataOffset;
+      CurrentSectionDataOffset += i->Header.SizeOfRawData;
+      // TODO: Handle alignment.
+    } else {
+      i->Header.SizeOfRawData = 0;
+      i->Header.PointerToRawData = 0;
+    }
+  }
+
+  uint32_t SymbolTableStart = CurrentSectionDataOffset;
+
+  // Calculate number of symbols.
+  uint32_t NumberOfSymbols = 0;
+  for (std::vector<COFFParser::Symbol>::iterator i = CP.Symbols.begin(),
+                                                 e = CP.Symbols.end();
+                                                 i != e; ++i) {
+    if (i->AuxSymbols.size() % COFF::SymbolSize != 0) {
+      errs() << "AuxillaryData size not a multiple of symbol size!\n";
+      return false;
+    }
+    i->Header.NumberOfAuxSymbols = i->AuxSymbols.size() / COFF::SymbolSize;
+    NumberOfSymbols += 1 + i->Header.NumberOfAuxSymbols;
+  }
+
+  // Store all the allocated start addresses in the header.
+  CP.Header.NumberOfSections = CP.Sections.size();
+  CP.Header.NumberOfSymbols = NumberOfSymbols;
+  CP.Header.PointerToSymbolTable = SymbolTableStart;
+
+  *reinterpret_cast<support::ulittle32_t *>(&CP.StringTable[0])
+    = CP.StringTable.size();
+
+  return true;
+}
+
+template <typename value_type>
+struct binary_le_impl {
+  value_type Value;
+  binary_le_impl(value_type V) : Value(V) {}
+};
+
+template <typename value_type>
+raw_ostream &operator <<( raw_ostream &OS
+                        , const binary_le_impl<value_type> &BLE) {
+  char Buffer[sizeof(BLE.Value)];
+  support::endian::write_le<value_type, support::unaligned>(Buffer, BLE.Value);
+  OS.write(Buffer, sizeof(BLE.Value));
+  return OS;
+}
+
+template <typename value_type>
+binary_le_impl<value_type> binary_le(value_type V) {
+  return binary_le_impl<value_type>(V);
+}
+
+void writeCOFF(COFFParser &CP, raw_ostream &OS) {
+  OS << binary_le(CP.Header.Machine)
+     << binary_le(CP.Header.NumberOfSections)
+     << binary_le(CP.Header.TimeDateStamp)
+     << binary_le(CP.Header.PointerToSymbolTable)
+     << binary_le(CP.Header.NumberOfSymbols)
+     << binary_le(CP.Header.SizeOfOptionalHeader)
+     << binary_le(CP.Header.Characteristics);
+
+  // Output section table.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.VirtualSize)
+       << binary_le(i->Header.VirtualAddress)
+       << binary_le(i->Header.SizeOfRawData)
+       << binary_le(i->Header.PointerToRawData)
+       << binary_le(i->Header.PointerToRelocations)
+       << binary_le(i->Header.PointerToLineNumbers)
+       << binary_le(i->Header.NumberOfRelocations)
+       << binary_le(i->Header.NumberOfLineNumbers)
+       << binary_le(i->Header.Characteristics);
+  }
+
+  // Output section data.
+  for (std::vector<COFFParser::Section>::const_iterator i = CP.Sections.begin(),
+                                                        e = CP.Sections.end();
+                                                        i != e; ++i) {
+    if (!i->Data.empty())
+      OS.write(reinterpret_cast<const char*>(&i->Data[0]), i->Data.size());
+  }
+
+  // Output symbol table.
+
+  for (std::vector<COFFParser::Symbol>::const_iterator i = CP.Symbols.begin(),
+                                                       e = CP.Symbols.end();
+                                                       i != e; ++i) {
+    OS.write(i->Header.Name, COFF::NameSize);
+    OS << binary_le(i->Header.Value)
+       << binary_le(i->Header.SectionNumber)
+       << binary_le(i->Header.Type)
+       << binary_le(i->Header.StorageClass)
+       << binary_le(i->Header.NumberOfAuxSymbols);
+    if (!i->AuxSymbols.empty())
+      OS.write( reinterpret_cast<const char*>(&i->AuxSymbols[0])
+              , i->AuxSymbols.size());
+  }
+
+  // Output string table.
+  OS.write(&CP.StringTable[0], CP.StringTable.size());
+}
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  OwningPtr<MemoryBuffer> Buf;
+  if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+    return 1;
+
+  SourceMgr SM;
+  yaml::Stream S(Buf->getBuffer(), SM);
+  COFFParser CP(S);
+  if (!CP.parse()) {
+    errs() << "yaml2obj: Failed to parse YAML file!\n";
+    return 1;
+  }
+  if (!layoutCOFF(CP)) {
+    errs() << "yaml2obj: Failed to layout COFF file!\n";
+    return 1;
+  }
+  writeCOFF(CP, outs());
+}
author	Stephen Hines <srhines@google.com>	2012-08-23 19:08:53 -0700
committer	Stephen Hines <srhines@google.com>	2012-08-23 19:08:53 -0700
commit	31675153bd2d7617db8cb6aeb58054934c7b9f73 (patch)
tree	c1970fcebc736d4f731db0559a79a7ac5cb0f8bf
parent	416bb6a168a9316547db6ce3909c515f70a84f52 (diff)
parent	75dd7f0c4a2b3fb9e9d4d5a0517591810c57ed92 (diff)
download	external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.zip external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.gz external_llvm-31675153bd2d7617db8cb6aeb58054934c7b9f73.tar.bz2