Merge with LLVM upstream r146714 (Dec 16th 2011)

Change-Id: Ied458adb08bf9a69250cbcee9b14b44d17e8701a
author: Logan Chien <loganchien@google.com> 2011-12-16 09:08:45 +0800
committer: Logan Chien <loganchien@google.com> 2011-12-16 13:28:58 +0800
commit: a1e6e241a813f81be2d2f36ab60c950ca297574b (patch)
tree: cf2d7ec5c63f40e2b66d8be7737496719a0d7902
parent: ac212abcc6d858470ad35ce7d660af0c1800364a (diff)
parent: ddecfe54a35ffbe0675f7f33e493734fd60b2495 (diff)
download: external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.zip
external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.gz
external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.bz2
1082 files changed, 47929 insertions, 13562 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5d36f5f..ec42120 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -72,6 +72,7 @@ set(LLVM_ALL_TARGETS
   CBackend
   CellSPU
   CppBackend
+  Hexagon
   Mips
   MBlaze
   MSP430
@@ -89,7 +90,7 @@ if( MSVC )
   set(LLVM_TARGETS_TO_BUILD X86
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 else( MSVC )
-  set(LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS}
+  set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
@@ -164,8 +165,8 @@ if (MSVC OR XCODE)
 endif()
 set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
 
-# On Win32/Cygwin, provide an option to specify the path to the GnuWin32 tools.
-if( WIN32 AND CYGWIN )
+# On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
+if( WIN32 AND NOT CYGWIN )
   set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
 endif()
 
@@ -230,7 +231,7 @@ endif()
 
 set(LLVMBUILDTOOL "${LLVM_MAIN_SRC_DIR}/utils/llvm-build/llvm-build")
 set(LLVMCONFIGLIBRARYDEPENDENCIESINC
-  "${LLVM_BINARY_DIR}/tools/llvm-config-2/LibraryDependencies.inc")
+  "${LLVM_BINARY_DIR}/tools/llvm-config/LibraryDependencies.inc")
 set(LLVMBUILDCMAKEFRAG
   "${LLVM_BINARY_DIR}/LLVMBuild.cmake")
 message(STATUS "Constructing LLVMBuild project information")
diff --git a/LLVMBuild.txt b/LLVMBuild.txt
index 66ab8e4..e763fd2 100644
--- a/LLVMBuild.txt
+++ b/LLVMBuild.txt
@@ -15,8 +15,10 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = bindings docs examples lib projects runtime tools utils
+
 [component_0]
 type = Group
 name = Miscellaneous
 parent = $ROOT
-
diff --git a/Makefile b/Makefile
index 0ef6fb4..35fa00f 100644
--- a/Makefile
+++ b/Makefile
@@ -27,11 +27,11 @@ LEVEL := .
 ifneq ($(findstring llvmCore, $(RC_ProjectName)),llvmCore)  # Normal build (not "Apple-style").
 
 ifeq ($(BUILD_DIRS_ONLY),1)
-  DIRS := lib/Support lib/TableGen utils tools/llvm-config-2
+  DIRS := lib/Support lib/TableGen utils tools/llvm-config
   OPTIONAL_DIRS := tools/clang/utils/TableGen
 else
   DIRS := lib/Support lib/TableGen utils lib/VMCore lib tools/llvm-shlib \
-          tools/llvm-config tools/llvm-config-2 tools runtime docs unittests
+          tools/llvm-config tools runtime docs unittests
   OPTIONAL_DIRS := projects bindings
 endif
 
@@ -68,20 +68,14 @@ endif
 
 ifeq ($(MAKECMDGOALS),install-clang)
   DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
+          tools/clang/tools/libclang tools/clang/tools/c-index-test \
+          tools/clang/include/clang-c \
           tools/clang/runtime tools/clang/docs \
           tools/lto runtime
   OPTIONAL_DIRS :=
   NO_INSTALL = 1
 endif
 
-ifeq ($(MAKECMDGOALS),install-clang-c)
-  DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
-          tools/clang/tools/libclang tools/clang/tools/c-index-test \
-	  tools/clang/include/clang-c
-  OPTIONAL_DIRS :=
-  NO_INSTALL = 1
-endif
-
 ifeq ($(MAKECMDGOALS),clang-only)
   DIRS := $(filter-out tools docs unittests, $(DIRS)) \
           tools/clang tools/lto
@@ -169,7 +163,6 @@ clang-only: all
 tools-only: all
 libs-only: all
 install-clang: install
-install-clang-c: install
 install-libs: install
 
 # If SHOW_DIAGNOSTICS is enabled, clear the diagnostics file first.
diff --git a/Makefile.config.in b/Makefile.config.in
index dd948bf..6422559 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -86,6 +86,13 @@ PROJ_VERSION := 1.0
 endif
 endif
 
+INTERNAL_PREFIX := @INTERNAL_PREFIX@
+ifneq ($(INTERNAL_PREFIX),)
+PROJ_internal_prefix := $(INTERNAL_PREFIX)
+else
+PROJ_internal_prefix := $(prefix)
+endif
+
 PROJ_bindir     := $(PROJ_prefix)/bin
 PROJ_libdir     := $(PROJ_prefix)/lib
 PROJ_datadir    := $(PROJ_prefix)/share
diff --git a/Makefile.rules b/Makefile.rules
index 3816bbb..974a962 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -79,7 +79,7 @@ LLVMBuildTool	:= $(PROJ_SRC_ROOT)/utils/llvm-build/llvm-build
 # The files we are going to generate using llvm-build.
 LLVMBuildMakeFrag := $(PROJ_OBJ_ROOT)/Makefile.llvmbuild
 LLVMConfigLibraryDependenciesInc := \
-	$(PROJ_OBJ_ROOT)/tools/llvm-config-2/LibraryDependencies.inc
+	$(PROJ_OBJ_ROOT)/tools/llvm-config/LibraryDependencies.inc
 
 # This is for temporary backwards compatibility.
 ifndef TARGET_NATIVE_ARCH
@@ -528,9 +528,9 @@ ifndef LLVM_TBLGEN
   endif
 endif
 ifeq ($(LLVM_CROSS_COMPILING),1)
-  LLVM_CONFIG := $(BuildLLVMToolDir)/llvm-config-2$(BUILD_EXEEXT)
+  LLVM_CONFIG := $(BuildLLVMToolDir)/llvm-config$(BUILD_EXEEXT)
 else
-  LLVM_CONFIG := $(LLVMToolDir)/llvm-config-2$(EXEEXT)
+  LLVM_CONFIG := $(LLVMToolDir)/llvm-config$(EXEEXT)
 endif
 ifndef LLVMLD
 LLVMLD    := $(LLVMToolDir)/llvm-ld$(EXEEXT)
@@ -1503,12 +1503,19 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestTool = $(DESTDIR)$(PROJ_bindir)/$(TOOLEXENAME)
+
+ifdef INTERNAL_TOOL
+ToolBinDir = $(DESTDIR)$(PROJ_internal_prefix)/bin
+else
+ToolBinDir = $(DESTDIR)$(PROJ_bindir)
+endif
+DestTool = $(ToolBinDir)/$(TOOLEXENAME)
 
 install-local:: $(DestTool)
 
-$(DestTool): $(ToolBuildPath) $(DESTDIR)$(PROJ_bindir)
+$(DestTool): $(ToolBuildPath)
 	$(Echo) Installing $(BuildMode) $(DestTool)
+	$(Verb) $(MKDIR) $(ToolBinDir)
 	$(Verb) $(ProgInstall) $(ToolBuildPath) $(DestTool)
 
 uninstall-local::
@@ -1517,7 +1524,7 @@ uninstall-local::
 
 # TOOLALIAS install.
 ifdef TOOLALIAS
-DestToolAlias = $(DESTDIR)$(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
+DestToolAlias = $(ToolBinDir)/$(TOOLALIAS)$(EXEEXT)
 
 install-local:: $(DestToolAlias)
 
@@ -1851,6 +1858,9 @@ $(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir $(LLVM_TBLGEN)
 	$(Echo) "Building $(<F) decoder tables with tblgen"
 	$(Verb) $(LLVMTableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
 
+$(ObjDir)/%GenDFAPacketizer.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) DFA packetizer tables with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-dfa-packetizer -o $(call SYSPATH, $@) $<
 
 clean-local::
 	-$(Verb) $(RM) -f $(INCFiles)
@@ -2276,6 +2286,7 @@ printvars::
 	$(Echo) "LLVM_SRC_ROOT: " '$(LLVM_SRC_ROOT)'
 	$(Echo) "LLVM_OBJ_ROOT: " '$(LLVM_OBJ_ROOT)'
 	$(Echo) "PROJ_prefix  : " '$(PROJ_prefix)'
+	$(Echo) "PROJ_internal_prefix  : " '$(PROJ_internal_prefix)'
 	$(Echo) "PROJ_bindir  : " '$(PROJ_bindir)'
 	$(Echo) "PROJ_libdir  : " '$(PROJ_libdir)'
 	$(Echo) "PROJ_etcdir  : " '$(PROJ_etcdir)'
diff --git a/autoconf/config.sub b/autoconf/config.sub
index da19a88..9942491 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -4,7 +4,7 @@
 #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 #   2011 Free Software Foundation, Inc.
 
-timestamp='2011-08-23'
+timestamp='2011-11-02'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -256,6 +256,7 @@ case $basic_machine in
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
 	| fido | fr30 | frv \
+	| hexagon \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
@@ -367,6 +368,7 @@ case $basic_machine in
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
+	| hexagon-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index d777553..d7fc95b 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -114,6 +114,7 @@ do
       llvm-tv)      AC_CONFIG_SUBDIRS([projects/llvm-tv])   ;;
       safecode)     AC_CONFIG_SUBDIRS([projects/safecode]) ;;
       llvm-kernel)  AC_CONFIG_SUBDIRS([projects/llvm-kernel]) ;;
+      compiler-rt)       ;;
       llvm-gcc)       ;;
       test-suite)     ;;
       llvm-test)      ;;
@@ -356,6 +357,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -502,6 +504,7 @@ else
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -538,12 +541,13 @@ AC_ARG_ENABLE(threads,
                              [Use threads if available (default is YES)]),,
                              enableval=default)
 case "$enableval" in
-  yes) AC_SUBST(ENABLE_THREADS,[1]) ;;
-  no)  AC_SUBST(ENABLE_THREADS,[0]) ;;
-  default) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  yes) AC_SUBST(LLVM_ENABLE_THREADS,[1]) ;;
+  no)  AC_SUBST(LLVM_ENABLE_THREADS,[0]) ;;
+  default) AC_SUBST(LLVM_ENABLE_THREADS,[1]) ;;
   *) AC_MSG_ERROR([Invalid setting for --enable-threads. Use "yes" or "no"]) ;;
 esac
-AC_DEFINE_UNQUOTED([ENABLE_THREADS],$ENABLE_THREADS,[Define if threads enabled])
+AC_DEFINE_UNQUOTED([LLVM_ENABLE_THREADS],$LLVM_ENABLE_THREADS,
+                   [Define if threads enabled])
 
 dnl Allow disablement of pthread.h
 AC_ARG_ENABLE(pthreads,
@@ -613,14 +617,14 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, arm, mips, spu,
+     host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
      xcore, msp430, ptx, cbe, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -634,6 +638,7 @@ case "$enableval" in
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
@@ -647,6 +652,7 @@ case "$enableval" in
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
@@ -884,6 +890,12 @@ AC_ARG_ENABLE(libffi,AS_HELP_STRING(
   esac],
   llvm_cv_enable_libffi=no)
 
+AC_ARG_WITH(internal-prefix,
+  AS_HELP_STRING([--with-internal-prefix],
+    [Installation directory for internal files]),,
+    withval="")
+AC_SUBST(INTERNAL_PREFIX,[$withval])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 4: Check for programs we need and that they are the right version
@@ -1144,7 +1156,7 @@ AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
 
 dnl pthread locking functions are optional - but llvm will not be thread-safe
 dnl without locks.
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_LIB(pthread, pthread_mutex_init)
   AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
                  AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
@@ -1235,7 +1247,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
 AC_CHECK_HEADERS([fenv.h])
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_HEADERS(pthread.h,
                    AC_SUBST(HAVE_PTHREAD, 1),
                    AC_SUBST(HAVE_PTHREAD, 0))
@@ -1571,9 +1583,6 @@ if test -f ${srcdir}/tools/clang/README.txt; then
   AC_CONFIG_FILES([tools/clang/docs/doxygen.cfg])
 fi
 
-dnl Do the first stage of configuration for llvm-config.in.
-AC_CONFIG_FILES([tools/llvm-config/llvm-config.in])
-
 dnl OCaml findlib META file
 AC_CONFIG_FILES([bindings/ocaml/llvm/META.llvm])
 
diff --git a/bindings/LLVMBuild.txt b/bindings/LLVMBuild.txt
index 4137077..241ac09 100644
--- a/bindings/LLVMBuild.txt
+++ b/bindings/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Group
 name = Bindings
 parent = $ROOT
-
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 0943ae7..88eaa74 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -332,6 +332,11 @@ else ()
   set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
   set(LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC)
   set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
+
+  # We don't have an ASM parser for all architectures yet.
+  if (EXISTS ${CMAKE_SOURCE_DIR}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/CMakeLists.txt)
+    set(LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser)
+  endif ()
 endif ()
 
 if( MINGW )
@@ -379,14 +384,15 @@ endif( PURE_WINDOWS )
 set(RETSIGTYPE void)
 
 if( LLVM_ENABLE_THREADS )
-  if( HAVE_PTHREAD_H OR WIN32 )
-    set(ENABLE_THREADS 1)
+  # Check if threading primitives aren't supported on this platform
+  if( NOT HAVE_PTHREAD_H AND NOT WIN32 )
+    set(LLVM_ENABLE_THREADS 0)
   endif()
 endif()
 
-if( ENABLE_THREADS )
+if( LLVM_ENABLE_THREADS )
   message(STATUS "Threads enabled.")
-else( ENABLE_THREADS )
+else( LLVM_ENABLE_THREADS )
   message(STATUS "Threads disabled.")
 endif()
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index b486fe4..388208b 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -25,16 +25,15 @@ macro(add_llvm_library name)
       ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
   endif()
   set_target_properties(${name} PROPERTIES FOLDER "Libraries")
-endmacro(add_llvm_library name)
-
-macro(add_llvm_library_dependencies name)
-  # Save the dependencies of the LLVM library in a variable so that we can
-  # query it when resolve llvm-config-style component -> library mappings.
-  set_property(GLOBAL PROPERTY LLVM_LIB_DEPS_${name} ${ARGN})
 
-  # Then add the actual dependencies to the library target.
-  target_link_libraries(${name} ${ARGN})
-endmacro(add_llvm_library_dependencies name)
+  # Add the explicit dependency information for this library.
+  #
+  # It would be nice to verify that we have the dependencies for this library
+  # name, but using get_property(... SET) doesn't suffice to determine if a
+  # property has been set to an empty value.
+  get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name})
+  target_link_libraries(${name} ${lib_deps})
+endmacro(add_llvm_library name)
 
 macro(add_llvm_loadable_module name)
   if( NOT LLVM_ON_UNIX OR CYGWIN )
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
index 2dcfa14..88d9852 100644
--- a/cmake/modules/CMakeLists.txt
+++ b/cmake/modules/CMakeLists.txt
@@ -4,9 +4,9 @@ set(LLVM_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
 get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
 
 foreach(lib ${llvm_libs})
-  get_property(llvm_lib_deps GLOBAL PROPERTY LLVM_LIB_DEPS_${lib})
+  get_property(llvm_lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib})
   set(all_llvm_lib_deps
-    "${all_llvm_lib_deps}\nset_property(GLOBAL PROPERTY LLVM_LIB_DEPS_${lib} ${llvm_lib_deps})")
+    "${all_llvm_lib_deps}\nset_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib} ${llvm_lib_deps})")
 endforeach(lib)
 
 configure_file(
diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake
index b5f262a..574335c 100755
--- a/cmake/modules/LLVM-Config.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -152,7 +152,7 @@ function(explicit_map_components_to_libraries out_libs)
   set(processed)
   while( cursor LESS lst_size )
     list(GET expanded_components ${cursor} lib)
-    get_property(lib_deps GLOBAL PROPERTY LLVM_LIB_DEPS_${lib})
+    get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib})
     list(APPEND expanded_components ${lib_deps})
     # Remove duplicates at the front:
     list(REVERSE expanded_components)
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index 81739be..d6a2ae5 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -3,7 +3,7 @@
 # existence of certain subdirectories under CMAKE_CURRENT_SOURCE_DIR.
 
 function(add_version_info_from_vcs VERS)
-  set(result ${${VERS}})
+  string(REPLACE "svn" "" result "${${VERS}}")
   if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.svn" )
     set(result "${result}svn")
     # FindSubversion does not work with symlinks. See PR 8437
@@ -13,6 +13,7 @@ function(add_version_info_from_vcs VERS)
     if( Subversion_FOUND )
       subversion_wc_info( ${CMAKE_CURRENT_SOURCE_DIR} Project )
       if( Project_WC_REVISION )
+        set(SVN_REVISION ${Project_WC_REVISION} PARENT_SCOPE)
         set(result "${result}-r${Project_WC_REVISION}")
       endif()
     endif()
@@ -21,24 +22,47 @@ function(add_version_info_from_vcs VERS)
     # Try to get a ref-id
     find_program(git_executable NAMES git git.exe git.cmd)
     if( git_executable )
-      execute_process(COMMAND ${git_executable} show-ref HEAD
+      set(is_git_svn_rev_exact false)
+      execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
                       WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                       TIMEOUT 5
                       RESULT_VARIABLE git_result
                       OUTPUT_VARIABLE git_output)
       if( git_result EQUAL 0 )
-        string(SUBSTRING ${git_output} 0 7 git_ref_id)
-        set(result "${result}-${git_ref_id}")
-      else()
-        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+        string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+        string(LENGTH "${git_svn_rev}" rev_length)
+        math(EXPR rev_length "${rev_length}-1")
+        string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
+        set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
+        set(git_svn_rev "-svn-${git_svn_rev}")
+
+        # Determine if the HEAD points directly at a subversion revision.
+        execute_process(COMMAND ${git_executable} svn find-rev HEAD
                         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                         TIMEOUT 5
                         RESULT_VARIABLE git_result
                         OUTPUT_VARIABLE git_output)
         if( git_result EQUAL 0 )
-          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
-          set(result "${result}-svn-${git_svn_rev}")
+          string(STRIP "${git_output}" git_head_svn_rev_number)
+          if( git_head_svn_rev_number EQUAL git_svn_rev_number )
+            set(is_git_svn_rev_exact true)
+          endif()
         endif()
+      else()
+        set(git_svn_rev "")
+      endif()
+      execute_process(COMMAND
+                      ${git_executable} rev-parse --short HEAD
+                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                      TIMEOUT 5
+                      RESULT_VARIABLE git_result
+                      OUTPUT_VARIABLE git_output)
+      if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
+        string(STRIP "${git_output}" git_ref_id)
+        set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
+        set(result "${result}${git_svn_rev}-${git_ref_id}")
+      else()
+        set(result "${result}${git_svn_rev}")
       endif()
     endif()
   endif()
diff --git a/configure b/configure
index bfd61dd..1d30b1c 100755
--- a/configure
+++ b/configure
@@ -694,7 +694,7 @@ JIT
 TARGET_HAS_JIT
 ENABLE_DOCS
 ENABLE_DOXYGEN
-ENABLE_THREADS
+LLVM_ENABLE_THREADS
 ENABLE_PTHREADS
 ENABLE_PIC
 ENABLE_SHARED
@@ -711,6 +711,7 @@ EXTRA_OPTIONS
 EXTRA_LD_OPTIONS
 CLANG_SRC_ROOT
 BINUTILS_INCDIR
+INTERNAL_PREFIX
 NM
 ifGNUmake
 LN_S
@@ -1418,7 +1419,7 @@ Optional Features:
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
                           x86_64, sparc, powerpc, arm, mips, spu, xcore,
-                          msp430, ptx, cbe, and cpp (default=all)
+                          hexagon, msp430, ptx, cbe, and cpp (default=all)
   --enable-cbe-printf-a   Enable C Backend output with hex floating point via
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
@@ -1451,6 +1452,7 @@ Optional Packages:
                           plugin-api.h file for gold plugin.
   --with-bug-report-url   Specify the URL where bug reports should be
                           submitted (default=http://llvm.org/bugs/)
+  --with-internal-prefix  Installation directory for internal files
   --with-tclinclude       directory where tcl headers are
   --with-udis86=<path>    Use udis86 external x86 disassembler library
   --with-oprofile=<prefix>
@@ -3491,6 +3493,7 @@ do
  ;;
       llvm-kernel)  subdirs="$subdirs projects/llvm-kernel"
  ;;
+      compiler-rt)       ;;
       llvm-gcc)       ;;
       test-suite)     ;;
       llvm-test)      ;;
@@ -3880,6 +3883,7 @@ else
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -5100,6 +5104,8 @@ else
  ;;
     MSP430)      TARGET_HAS_JIT=0
  ;;
+    Hexagon)     TARGET_HAS_JIT=0
+ ;;
     MBlaze)      TARGET_HAS_JIT=0
  ;;
     PTX)         TARGET_HAS_JIT=0
@@ -5155,11 +5161,11 @@ else
 fi
 
 case "$enableval" in
-  yes) ENABLE_THREADS=1
+  yes) LLVM_ENABLE_THREADS=1
  ;;
-  no)  ENABLE_THREADS=0
+  no)  LLVM_ENABLE_THREADS=0
  ;;
-  default) ENABLE_THREADS=1
+  default) LLVM_ENABLE_THREADS=1
  ;;
   *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&5
 echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&2;}
@@ -5167,7 +5173,7 @@ echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"
 esac
 
 cat >>confdefs.h <<_ACEOF
-#define ENABLE_THREADS $ENABLE_THREADS
+#define LLVM_ENABLE_THREADS $LLVM_ENABLE_THREADS
 _ACEOF
 
 
@@ -5288,7 +5294,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5302,6 +5308,7 @@ case "$enableval" in
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
@@ -5315,6 +5322,7 @@ case "$enableval" in
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
@@ -5671,6 +5679,17 @@ fi
 
 
 
+# Check whether --with-internal-prefix was given.
+if test "${with_internal_prefix+set}" = set; then
+  withval=$with_internal_prefix;
+else
+  withval=""
+fi
+
+INTERNAL_PREFIX=$withval
+
+
+
 { echo "$as_me:$LINENO: checking for BSD-compatible nm" >&5
 echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" >&6; }
 if test "${lt_cv_path_NM+set}" = set; then
@@ -10476,7 +10495,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 10479 "configure"
+#line 10493 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -12812,7 +12831,7 @@ _ACEOF
 fi
 
 
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 { echo "$as_me:$LINENO: checking for pthread_mutex_init in -lpthread" >&5
 echo $ECHO_N "checking for pthread_mutex_init in -lpthread... $ECHO_C" >&6; }
@@ -15525,7 +15544,7 @@ fi
 
 done
 
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 for ac_header in pthread.h
 do
@@ -21087,9 +21106,6 @@ if test -f ${srcdir}/tools/clang/README.txt; then
 
 fi
 
-ac_config_files="$ac_config_files tools/llvm-config/llvm-config.in"
-
-
 ac_config_files="$ac_config_files bindings/ocaml/llvm/META.llvm"
 
 
@@ -21709,7 +21725,6 @@ do
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
     "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;;
     "tools/clang/docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES tools/clang/docs/doxygen.cfg" ;;
-    "tools/llvm-config/llvm-config.in") CONFIG_FILES="$CONFIG_FILES tools/llvm-config/llvm-config.in" ;;
     "bindings/ocaml/llvm/META.llvm") CONFIG_FILES="$CONFIG_FILES bindings/ocaml/llvm/META.llvm" ;;
     "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
     "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
@@ -21879,7 +21894,7 @@ JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
-ENABLE_THREADS!$ENABLE_THREADS$ac_delim
+LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
 ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
@@ -21937,6 +21952,7 @@ EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 EXTRA_LD_OPTIONS!$EXTRA_LD_OPTIONS$ac_delim
 CLANG_SRC_ROOT!$CLANG_SRC_ROOT$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
+INTERNAL_PREFIX!$INTERNAL_PREFIX$ac_delim
 NM!$NM$ac_delim
 ifGNUmake!$ifGNUmake$ac_delim
 LN_S!$LN_S$ac_delim
@@ -22017,7 +22033,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 90; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 91; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 62ea6ca..345eb7c 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -50,6 +50,7 @@
     <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
                                      class</a></li>
     <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
+    <li><a href="#machineinstrbundle"><tt>MachineInstr Bundles</tt></a></li>
     </ul>
   </li>
   <li><a href="#mc">The "MC" Layer</a>
@@ -97,6 +98,14 @@
       <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
       </ul></li>
     <li><a href="#codeemit">Code Emission</a></li>
+    <li><a href="#vliw_packetizer">VLIW Packetizer</a>
+      <ul>
+      <li><a href="#vliw_mapping">Mapping from instructions to functional
+                 units</a></li>
+      <li><a href="#vliw_repr">How the packetization tables are
+                             generated and used</a></li>
+      </ul>
+    </li>
     </ul>
   </li>
   <li><a href="#nativeassembler">Implementing a Native Assembler</a></li>
@@ -753,6 +762,88 @@ ret
 
 </div>
 
+<!-- ======================================================================= -->
+<h3>
+  <a name="machineinstrbundle"><tt>MachineInstr Bundles</tt></a>
+</h3>
+
+<div>
+
+<p>LLVM code generator can model sequences of instructions as MachineInstr
+   bundles. A MI bundle can model a VLIW group / pack which contains an
+   arbitrary number of parallel instructions. It can also be used to model
+   a sequential list of instructions (potentially with data dependencies) that
+   cannot be legally separated (e.g. ARM Thumb2 IT blocks).</p>
+
+<p>Conceptually a MI bundle is a MI with a number of other MIs nested within:
+</p>
+
+<div class="doc_code">
+<pre>
+--------------
+|   Bundle   | ---------
+--------------          \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |                  ...
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |
+      ...
+</pre>
+</div>
+
+<p> MI bundle support does not change the physical representations of
+    MachineBasicBlock and MachineInstr. All the MIs (including top level and
+    nested ones) are stored as sequential list of MIs. The "bundled" MIs are
+    marked with the 'InsideBundle' flag. A top level MI with the special BUNDLE
+    opcode is used to represent the start of a bundle. It's legal to mix BUNDLE
+    MIs with indiviual MIs that are not inside bundles nor represent bundles.
+</p>
+
+<p> MachineInstr passes should operate on a MI bundle as a single unit. Member
+    methods have been taught to correctly handle bundles and MIs inside bundles.
+    The MachineBasicBlock iterator has been modified to skip over bundled MIs to
+    enforce the bundle-as-a-single-unit concept. An alternative iterator
+    instr_iterator has been added to MachineBasicBlock to allow passes to
+    iterate over all of the MIs in a MachineBasicBlock, including those which
+    are nested inside bundles. The top level BUNDLE instruction must have the
+    correct set of register MachineOperand's that represent the cumulative
+    inputs and outputs of the bundled MIs.</p>
+
+<p> Packing / bundling of MachineInstr's should be done as part of the register
+    allocation super-pass. More specifically, the pass which determines what
+    MIs should be bundled together must be done after code generator exits SSA
+    form (i.e. after two-address pass, PHI elimination, and copy coalescing).
+    Bundles should only be finalized (i.e. adding BUNDLE MIs and input and
+    output register MachineOperands) after virtual registers have been
+    rewritten into physical registers. This requirement eliminates the need to
+    add virtual register operands to BUNDLE instructions which would effectively
+    double the virtual register def and use lists.</p>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2001,6 +2092,73 @@ to implement an assembler for your target.</p>
 
 </div>
 
+<!-- ======================================================================= -->
+<h3>
+  <a name="vliw_packetizer">VLIW Packetizer</a>
+</h3>
+
+<div>
+
+<p>In a Very Long Instruction Word (VLIW) architecture, the compiler is
+   responsible for mapping instructions to functional-units available on
+   the architecture. To that end, the compiler creates groups of instructions
+   called <i>packets</i> or <i>bundles</i>. The VLIW packetizer in LLVM is
+   a target-independent mechanism to enable the packetization of machine
+   instructions.</p>
+
+<!-- _______________________________________________________________________ -->
+
+<h4>
+  <a name="vliw_mapping">Mapping from instructions to functional units</a>
+</h4>
+
+<div>
+
+<p>Instructions in a VLIW target can typically be mapped to multiple functional
+units. During the process of packetizing, the compiler must be able to reason
+about whether an instruction can be added to a packet. This decision can be
+complex since the compiler has to examine all possible mappings of instructions
+to functional units. Therefore to alleviate compilation-time complexity, the
+VLIW packetizer parses the instruction classes of a target and generates tables
+at compiler build time. These tables can then be queried by the provided
+machine-independent API to determine if an instruction can be accommodated in a
+packet.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="vliw_repr">
+    How the packetization tables are generated and used
+  </a>
+</h4>
+
+<div>
+
+<p>The packetizer reads instruction classes from a target's itineraries and
+creates a deterministic finite automaton (DFA) to represent the state of a
+packet. A DFA consists of three major elements: inputs, states, and
+transitions. The set of inputs for the generated DFA represents the instruction
+being added to a packet. The states represent the possible consumption
+of functional units by instructions in a packet. In the DFA, transitions from
+one state to another occur on the addition of an instruction to an existing
+packet. If there is a legal mapping of functional units to instructions, then
+the DFA contains a corresponding transition. The absence of a transition
+indicates that a legal mapping does not exist and that the instruction cannot
+be added to the packet.</p>
+
+<p>To generate tables for a VLIW target, add <i>Target</i>GenDFAPacketizer.inc
+as a target to the Makefile in the target directory. The exported API provides
+three functions: <tt>DFAPacketizer::clearResources()</tt>,
+<tt>DFAPacketizer::reserveResources(MachineInstr *MI)</tt>, and
+<tt>DFAPacketizer::canReserveResources(MachineInstr *MI)</tt>. These functions
+allow a target packetizer to add an instruction to an existing packet and to
+check whether an instruction can be added to a packet. See
+<tt>llvm/CodeGen/DFAPacketizer.h</tt> for more information.</p>
+
+</div>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2213,6 +2371,7 @@ is the key:</p>
     <th>Feature</th>
     <th>ARM</th>
     <th>CellSPU</th>
+    <th>Hexagon</th>
     <th>MBlaze</th>
     <th>MSP430</th>
     <th>Mips</th>
@@ -2227,6 +2386,7 @@ is the key:</p>
   <td><a href="#feat_reliable">is generally reliable</a></td>
   <td class="yes"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="yes"></td> <!-- Mips -->
@@ -2241,6 +2401,7 @@ is the key:</p>
   <td><a href="#feat_asmparser">assembly parser</a></td>
   <td class="no"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
@@ -2255,6 +2416,7 @@ is the key:</p>
   <td><a href="#feat_disassembler">disassembler</a></td>
   <td class="yes"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
@@ -2269,6 +2431,7 @@ is the key:</p>
   <td><a href="#feat_inlineasm">inline asm</a></td>
   <td class="yes"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
@@ -2283,6 +2446,7 @@ is the key:</p>
   <td><a href="#feat_jit">jit</a></td>
   <td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="yes"></td> <!-- Mips -->
@@ -2297,6 +2461,7 @@ is the key:</p>
   <td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>
   <td class="no"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
@@ -2311,6 +2476,7 @@ is the key:</p>
   <td><a href="#feat_tailcall">tail calls</a></td>
   <td class="yes"></td> <!-- ARM -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
@@ -2321,6 +2487,20 @@ is the key:</p>
   <td class="unknown"></td> <!-- XCore -->
 </tr>
 
+<tr>
+  <td><a href="#feat_segstacks">segmented stacks</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
 
 </table>
 
@@ -2404,6 +2584,22 @@ more more details</a>.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<h4 id="feat_segstacks">Segmented Stacks</h4>
+
+<div>
+
+<p>This box indicates whether the target supports segmented stacks. This
+replaces the traditional large C stack with many linked segments. It
+is compatible with the <a href="http://gcc.gnu.org/wiki/SplitStacks">gcc
+implementation</a> used by the Go front end.</p>
+
+<p id="feat_segstacks_x86">Basic support exists on the X86 backend. Currently
+vararg doesn't work and the object files are not marked the way the gold
+linker expects, but simple Go programs can be built by dragonegg.</p>
+
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index 166cca1..145569c 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -72,6 +72,9 @@ options) arguments to the tool you are interested in.</p>
 <li><a href="/cmds/llvm-diff.html"><b>llvm-diff</b></a> -
     structurally compare two modules</li>
 
+<li><a href="/cmds/llvm-cov.html"><b>llvm-cov</b></a> -
+    emit coverage information</li>
+
 </ul>
 
 </div>
diff --git a/docs/CommandGuide/llvm-build.pod b/docs/CommandGuide/llvm-build.pod
index 78648ba..14e08cb 100644
--- a/docs/CommandGuide/llvm-build.pod
+++ b/docs/CommandGuide/llvm-build.pod
@@ -46,7 +46,10 @@ component combinations.
 =item B<--write-llvmbuild>
 
 Write out new I<LLVMBuild.txt> files based on the loaded components. This is
-useful for auto-upgrading the schema of the files.
+useful for auto-upgrading the schema of the files. B<llvm-build> will try to a
+limited extent to preserve the comments which were written in the original
+source file, although at this time it only preserves block comments that preceed
+the section names in the I<LLVMBuild> files.
 
 =item B<--write-cmake-fragment>
 
diff --git a/docs/CommandGuide/llvm-cov.pod b/docs/CommandGuide/llvm-cov.pod
new file mode 100644
index 0000000..e8ff683
--- /dev/null
+++ b/docs/CommandGuide/llvm-cov.pod
@@ -0,0 +1,45 @@
+=pod
+
+=head1 NAME
+
+llvm-cov - emit coverage information
+
+=head1 SYNOPSIS
+
+B<llvm-cov> [-gcno=filename] [-gcda=filename] [dump]
+
+=head1 DESCRIPTION
+
+The experimental B<llvm-cov> tool reads in description file generated by compiler 
+and coverage data file generated by instrumented program. This program assumes 
+that the description and data file uses same format as gcov files. 
+
+=head1 OPTIONS
+
+=over
+
+=item B<-gcno=filename]
+
+This option selects input description file generated by compiler while instrumenting
+program.
+
+=item B<-gcda=filename]
+
+This option selects coverage data file generated by instrumented compiler.
+
+=item B<-dump>
+
+This options enables output dump that is suitable for a developer to help debug
+B<llvm-cov> itself.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-cov> returns 1 if it cannot read input files. Otherwise, it exits with zero.
+
+=head1 AUTHOR
+
+B<llvm-cov> is maintained by the LLVM Team (L<http://llvm.org/>).
+
+=cut
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index b378deb..42881f5 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -38,7 +38,6 @@
   	<li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_callsite"><tt>llvm.eh.sjlj.callsite</tt></a></li>
-  	<li><a href="#llvm_eh_sjlj_dispatchsetup"><tt>llvm.eh.sjlj.dispatchsetup</tt></a></li>
   </ol></li>
   <li><a href="#asm">Asm Table Formats</a>
   <ol>
@@ -50,7 +49,7 @@
 </tr></table>
 
 <div class="doc_author">
-  <p>Written by <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+  <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
 </div>
 
 
@@ -499,25 +498,6 @@
 </div>
 
 <!-- ======================================================================= -->
-<h4>
-  <a name="llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>
-</h4>
-
-<div>
-
-<pre>
-  void @llvm.eh.sjlj.dispatchsetup(i32 %dispatch_value)
-</pre>
-
-<p>For SJLJ based exception handling, the <tt>llvm.eh.sjlj.dispatchsetup</tt>
-   intrinsic is used by targets to do any unwind edge setup they need. By
-   default, no action is taken.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
 <h2>
   <a name="asm">Asm Table Formats</a>
 </h2>
@@ -573,7 +553,6 @@
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
   Last modified: $Date$
 </address>
diff --git a/docs/LLVMBuild.html b/docs/LLVMBuild.html
index 2f06e30..b069467 100644
--- a/docs/LLVMBuild.html
+++ b/docs/LLVMBuild.html
@@ -147,7 +147,7 @@ $ROOT of project trees for things which can be checked out separately. -->
 <i>; Comments start with a semi-colon.</i>
 
 <i>; Sections are declared using square brackets.</i>
-[component 0]
+[component_0]
 
 <i>; Properties are declared using '=' and are contained in the previous section.
 ;
@@ -160,7 +160,7 @@ boolean_property_name = 1 <em>(or 0)</em>
 </pre>
   </div>
 
-  <p>LLVMBuild files are expected to define a strict set of section and
+  <p>LLVMBuild files are expected to define a strict set of sections and
   properties. An typical component description file for a library
   component would look typically look like the following example:</p>
   <div class="doc_code">
@@ -176,14 +176,22 @@ required_libraries = Archive BitReader Core Support TransformUtils
   <p>A full description of the exact sections and properties which are allowed
  follows.</p>
 
+  <p>Each file may define exactly one common component, named "common". The
+  common component may define the following properties:</p>
+  <ul>
+    <li><i>subdirectories</i> <b>[optional]</b>
+      <p>If given, a list of the names of the subdirectories from the current
+        subpath to search for additional LLVMBuild files.</p></li>
+  </ul>
+
   <p>Each file may define multiple components. Each component is described by a
   section who name starts with "component". The remainder of the section name is
   ignored, but each section name must be unique. Typically components are just
   number in order for files with multiple components ("component_0",
   "component_1", and so on).<p>
 
-  <p><b>Section names not matches this format are currently
-  unused and are disallowed.</b></p>
+  <p><b>Section names not matches this format (or the "common" section) are
+  currently unused and are disallowed.</b></p>
 
   <p>Every component is defined by the properties in the section. The exact list
   of properties that are allowed depends on the component
diff --git a/docs/LLVMBuild.txt b/docs/LLVMBuild.txt
index 581d7dd..d5aea86 100644
--- a/docs/LLVMBuild.txt
+++ b/docs/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Group
 name = Docs
 parent = $ROOT
-
diff --git a/docs/LangRef.html b/docs/LangRef.html
index a9ec800..90308a4 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -92,7 +92,7 @@
       <li><a href="#complexconstants">Complex Constants</a></li>
       <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
       <li><a href="#undefvalues">Undefined Values</a></li>
-      <li><a href="#trapvalues">Trap Values</a></li>
+      <li><a href="#poisonvalues">Poison Values</a></li>
       <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
       <li><a href="#constantexprs">Constant Expressions</a></li>
     </ol>
@@ -288,10 +288,10 @@
       </li>
       <li><a href="#int_memorymarkers">Memory Use Markers</a>
         <ol>
-          <li><a href="#int_lifetime_start"><tt>llvm.lifetime.start</tt></a></li>
-          <li><a href="#int_lifetime_end"><tt>llvm.lifetime.end</tt></a></li>
-          <li><a href="#int_invariant_start"><tt>llvm.invariant.start</tt></a></li>
-          <li><a href="#int_invariant_end"><tt>llvm.invariant.end</tt></a></li>
+          <li><a href="#int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a></li>
         </ol>
       </li>
       <li><a href="#int_general">General intrinsics</a>
@@ -306,6 +306,8 @@
             '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
 	  <li><a href="#int_objectsize">
             '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
+	  <li><a href="#int_expect">
+            '<tt>llvm.expect</tt>' Intrinsic</a></li>
         </ol>
       </li>
     </ol>
@@ -1214,6 +1216,12 @@ define void @f() optsize { ... }
       exception by calling the <tt>C++</tt> exception throwing methods, but may
       use the <tt>unwind</tt> instruction.</dd>
 
+  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
+  <dd>This attribute indicates that this function can return twice. The
+      C <code>setjmp</code> is an example of such a function.  The compiler
+      disables some optimizations (like tail calls) in the caller of these
+      functions.</dd>
+
   <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
   <dd>This attribute indicates that the function should emit a stack smashing
       protector. It is in the form of a "canary"&mdash;a random value placed on
@@ -1241,12 +1249,6 @@ define void @f() optsize { ... }
       show that no exceptions passes by it. This is normally the case for
       the ELF x86-64 abi, but it can be disabled for some compilation
       units.</dd>
-
-  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
-  <dd>This attribute indicates that this function can return
-  twice. The C <code>setjmp</code> is an example of such a function.
-  The compiler disables some optimizations (like tail calls) in the caller of
-  these functions.</dd>
 </dl>
 
 </div>
@@ -1911,9 +1913,9 @@ in signal handlers).</p>
 <div>
 
 <p>Aggregate Types are a subset of derived types that can contain multiple
-  member types. <a href="#t_array">Arrays</a>,
-  <a href="#t_struct">structs</a>, and <a href="#t_vector">vectors</a> are
-  aggregate types.</p>
+  member types. <a href="#t_array">Arrays</a> and
+  <a href="#t_struct">structs</a> are aggregate types.
+  <a href="#t_vector">Vectors</a> are not considered to be aggregate types.</p>
 
 </div>
 
@@ -2187,8 +2189,8 @@ in signal handlers).</p>
 </pre>
 
 <p>The number of elements is a constant integer value larger than 0; elementtype
-   may be any integer or floating point type.  Vectors of size zero are not
-   allowed, and pointers are not allowed as the element type.</p>
+   may be any integer or floating point type, or a pointer to these types.
+   Vectors of size zero are not allowed. </p>
 
 <h5>Examples:</h5>
 <table class="layout">
@@ -2204,6 +2206,10 @@ in signal handlers).</p>
     <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
     <td class="left">Vector of 2 64-bit integer values.</td>
   </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;4 x i64*&gt;</tt></td>
+    <td class="left">Vector of 4 pointers to 64-bit integer values.</td>
+  </tr>
 </table>
 
 </div>
@@ -2500,22 +2506,21 @@ b: unreachable
 
 <!-- ======================================================================= -->
 <h3>
-  <a name="trapvalues">Trap Values</a>
+  <a name="poisonvalues">Poison Values</a>
 </h3>
 
 <div>
 
-<p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
-   instead of representing an unspecified bit pattern, they represent the
-   fact that an instruction or constant expression which cannot evoke side
-   effects has nevertheless detected a condition which results in undefined
-   behavior.</p>
+<p>Poison values are similar to <a href="#undefvalues">undef values</a>, however
+   they also represent the fact that an instruction or constant expression which
+   cannot evoke side effects has nevertheless detected a condition which results
+   in undefined behavior.</p>
 
-<p>There is currently no way of representing a trap value in the IR; they
+<p>There is currently no way of representing a poison value in the IR; they
    only exist when produced by operations such as
    <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
 
-<p>Trap value behavior is defined in terms of value <i>dependence</i>:</p>
+<p>Poison value behavior is defined in terms of value <i>dependence</i>:</p>
 
 <ul>
 <li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
@@ -2566,62 +2571,61 @@ b: unreachable
 
 </ul>
 
-<p>Whenever a trap value is generated, all values which depend on it evaluate
-   to trap. If they have side effects, they evoke their side effects as if each
-   operand with a trap value were undef. If they have externally-visible side
-   effects, the behavior is undefined.</p>
+<p>Poison Values have the same behavior as <a href="#undefvalues">undef values</a>,
+   with the additional affect that any instruction which has a <i>dependence</i>
+   on a poison value has undefined behavior.</p>
 
 <p>Here are some examples:</p>
 
 <pre class="doc_code">
 entry:
-  %trap = sub nuw i32 0, 1           ; Results in a trap value.
-  %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
-  %trap_yet_again = getelementptr i32* @h, i32 %still_trap
-  store i32 0, i32* %trap_yet_again  ; undefined behavior
+  %poison = sub nuw i32 0, 1           ; Results in a poison value.
+  %still_poison = and i32 %poison, 0   ; 0, but also poison.
+  %poison_yet_again = getelementptr i32* @h, i32 %still_poison
+  store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
 
-  store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
-  %trap2 = load i32* @g              ; Returns a trap value, not just undef.
+  store i32 %poison, i32* @g           ; Poison value stored to memory.
+  %poison2 = load i32* @g              ; Poison value loaded back from memory.
 
-  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+  store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
 
   %narrowaddr = bitcast i32* @g to i16*
   %wideaddr = bitcast i32* @g to i64*
-  %trap3 = load i16* %narrowaddr     ; Returns a trap value.
-  %trap4 = load i64* %wideaddr       ; Returns a trap value.
+  %poison3 = load i16* %narrowaddr     ; Returns a poison value.
+  %poison4 = load i64* %wideaddr       ; Returns a poison value.
 
-  %cmp = icmp slt i32 %trap, 0       ; Returns a trap value.
-  br i1 %cmp, label %true, label %end ; Branch to either destination.
+  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
+  br i1 %cmp, label %true, label %end  ; Branch to either destination.
 
 true:
-  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
-                                     ; it has undefined behavior.
+  store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
+                                       ; it has undefined behavior.
   br label %end
 
 end:
   %p = phi i32 [ 0, %entry ], [ 1, %true ]
-                                     ; Both edges into this PHI are
-                                     ; control-dependent on %cmp, so this
-                                     ; always results in a trap value.
+                                       ; Both edges into this PHI are
+                                       ; control-dependent on %cmp, so this
+                                       ; always results in a poison value.
 
-  volatile store i32 0, i32* @g      ; This would depend on the store in %true
-                                     ; if %cmp is true, or the store in %entry
-                                     ; otherwise, so this is undefined behavior.
+  store volatile i32 0, i32* @g        ; This would depend on the store in %true
+                                       ; if %cmp is true, or the store in %entry
+                                       ; otherwise, so this is undefined behavior.
 
   br i1 %cmp, label %second_true, label %second_end
-                                     ; The same branch again, but this time the
-                                     ; true block doesn't have side effects.
+                                       ; The same branch again, but this time the
+                                       ; true block doesn't have side effects.
 
 second_true:
   ; No side effects!
   ret void
 
 second_end:
-  volatile store i32 0, i32* @g      ; This time, the instruction always depends
-                                     ; on the store in %end. Also, it is
-                                     ; control-equivalent to %end, so this is
-                                     ; well-defined (again, ignoring earlier
-                                     ; undefined behavior in this example).
+  store volatile i32 0, i32* @g        ; This time, the instruction always depends
+                                       ; on the store in %end. Also, it is
+                                       ; control-equivalent to %end, so this is
+                                       ; well-defined (ignoring earlier undefined
+                                       ; behavior in this example).
 </pre>
 
 </div>
@@ -2800,7 +2804,7 @@ second_end:
 <div>
 
 <p>LLVM supports inline assembler expressions (as opposed
-   to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
+   to <a href="#moduleasm">Module-Level Inline Assembly</a>) through the use of
    a special value.  This value represents the inline assembler as a string
    (containing the instructions to emit), a list of operand constraints (stored
    as a string), a flag that indicates whether or not the inline asm
@@ -2842,23 +2846,27 @@ call void asm alignstack "eieio", ""()
 <p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come
    first.</p>
 
+<!--
 <p>TODO: The format of the asm and constraints string still need to be
    documented here.  Constraints on what can be done (e.g. duplication, moving,
    etc need to be documented).  This is probably best done by reference to
    another document that covers inline asm from a holistic perspective.</p>
+  -->
 
+<!-- _______________________________________________________________________ -->
 <h4>
-<a name="inlineasm_md">Inline Asm Metadata</a>
+  <a name="inlineasm_md">Inline Asm Metadata</a>
 </h4>
 
 <div>
 
-<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
-   attached to it that contains a list of constant integers.  If present, the
-  code generator will use the integer as the location cookie value when report
-   errors through the LLVMContext error reporting mechanisms.  This allows a
-   front-end to correlate backend errors that occur with inline asm back to the
-   source code that produced it.  For example:</p>
+<p>The call instructions that wrap inline asm nodes may have a
+   "<tt>!srcloc</tt>" MDNode attached to it that contains a list of constant
+   integers.  If present, the code generator will use the integer as the
+   location cookie value when report errors through the <tt>LLVMContext</tt>
+   error reporting mechanisms.  This allows a front-end to correlate backend
+   errors that occur with inline asm back to the source code that produced it.
+   For example:</p>
 
 <pre class="doc_code">
 call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
@@ -2867,7 +2875,7 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
 </pre>
 
 <p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR.  If the MDNode contains multiple constants, the code generator will use
+   IR. If the MDNode contains multiple constants, the code generator will use
    the one that corresponds to the line of the asm that the error occurs on.</p>
 
 </div>
@@ -2889,20 +2897,33 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
    preceding exclamation point ('<tt>!</tt>').</p>
 
 <p>A metadata string is a string surrounded by double quotes.  It can contain
-   any character by escaping non-printable characters with "\xx" where "xx" is
-   the two digit hex code.  For example: "<tt>!"test\00"</tt>".</p>
+   any character by escaping non-printable characters with "<tt>\xx</tt>" where
+   "<tt>xx</tt>" is the two digit hex code.  For example:
+   "<tt>!"test\00"</tt>".</p>
 
 <p>Metadata nodes are represented with notation similar to structure constants
    (a comma separated list of elements, surrounded by braces and preceded by an
-   exclamation point).  For example: "<tt>!{ metadata !"test\00", i32
-   10}</tt>".  Metadata nodes can have any values as their operand.</p>
+   exclamation point). Metadata nodes can have any values as their operand. For
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!{ metadata !"test\00", i32 10}
+</pre>
+</div>
 
 <p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
    metadata nodes, which can be looked up in the module symbol table. For
-   example: "<tt>!foo =  metadata !{!4, !3}</tt>".
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!foo =  metadata !{!4, !3}
+</pre>
+</div>
 
 <p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
-   function is using two metadata arguments.</p>
+   function is using two metadata arguments:</p>
 
 <div class="doc_code">
 <pre>
@@ -2911,7 +2932,8 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 </div>
 
 <p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
-   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.</p>
+   attached to the <tt>add</tt> instruction using the <tt>!dbg</tt>
+   identifier:</p>
 
 <div class="doc_code">
 <pre>
@@ -2922,6 +2944,7 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 <p>More information about specific metadata nodes recognized by the optimizers
    and code generator is found below.</p>
 
+<!-- _______________________________________________________________________ -->
 <h4>
   <a name="tbaa">'<tt>tbaa</tt>' Metadata</a>
 </h4>
@@ -2966,6 +2989,7 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 
 </div>
 
+<!-- _______________________________________________________________________ -->
 <h4>
   <a name="fpaccuracy">'<tt>fpaccuracy</tt>' Metadata</a>
 </h4>
@@ -3378,7 +3402,7 @@ IfUnequal:
    <i><a href="ExceptionHandling.html#overview">landing pad</a></i> for the
    exception. As such, '<tt>exception</tt>' label is required to have the
    "<a href="#i_landingpad"><tt>landingpad</tt></a>" instruction, which contains
-   the information about about the behavior of the program after unwinding
+   the information about the behavior of the program after unwinding
    happens, as its first non-PHI instruction. The restrictions on the
    "<tt>landingpad</tt>" instruction's tightly couples it to the
    "<tt>invoke</tt>" instruction, so that the important information contained
@@ -3593,7 +3617,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3674,7 +3698,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3761,7 +3785,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3831,7 +3855,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p>Division by zero leads to undefined behavior.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>udiv</tt> is a <a href="#trapvalues">trap value</a> if %op1 is not a
+   <tt>udiv</tt> is a <a href="#poisonvalues">poison value</a> if %op1 is not a
   multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
 
 
@@ -3875,7 +3899,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    a 32-bit division of -2147483648 by -1.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is a <a href="#trapvalues">trap value</a> if the result would
+   <tt>sdiv</tt> is a <a href="#poisonvalues">poison value</a> if the result would
    be rounded.</p>
 
 <h5>Example:</h5>
@@ -4084,9 +4108,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>nuw</tt> keyword is present, then the shift produces a 
-   <a href="#trapvalues">trap value</a> if it shifts out any non-zero bits.  If
+   <a href="#poisonvalues">poison value</a> if it shifts out any non-zero bits.  If
    the <tt>nsw</tt> keyword is present, then the shift produces a
-   <a href="#trapvalues">trap value</a> if it shifts out any bits that disagree
+   <a href="#poisonvalues">poison value</a> if it shifts out any bits that disagree
    with the resultant sign bit.  As such, NUW/NSW have the same semantics as
    they would if the shift were expressed as a mul instruction with the same
    nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
@@ -4133,7 +4157,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>lshr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   <tt>lshr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
    shifted out are non-zero.</p>
 
 
@@ -4181,7 +4205,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    the corresponding shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>ashr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   <tt>ashr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
    shifted out are non-zero.</p>
 
 <h5>Example:</h5>
@@ -4223,9 +4247,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4284,9 +4308,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4348,9 +4372,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4761,8 +4785,8 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]                   <i>; yields {void}</i>
-  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;             <i>; yields {void}</i>
+  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]        <i>; yields {void}</i>
+  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;  <i>; yields {void}</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -4891,7 +4915,7 @@ thread.  (This is useful for interacting with signal handlers.)</p>
 
 <h5>Syntax:</h5>
 <pre>
-  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;  <i>; yields {ty}</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -4949,13 +4973,13 @@ FIXME: Is a weaker ordering constraint on failure helpful in practice?
 <h5>Example:</h5>
 <pre>
 entry:
-  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                       <i>; yields {i32}</i>
+  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                   <i>; yields {i32}</i>
   <a href="#i_br">br</a> label %loop
 
 loop:
   %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
   %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
-  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared                       <i>; yields {i32}</i>
+  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          <i>; yields {i32}</i>
   %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
   <a href="#i_br">br</a> i1 %success, label %done, label %loop
 
@@ -5047,6 +5071,7 @@ specified by the <var>operation</var> argument:</p>
 <pre>
   &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
   &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx 
 </pre>
 
 <h5>Overview:</h5>
@@ -5055,7 +5080,8 @@ specified by the <var>operation</var> argument:</p>
    It performs address calculation only and does not access memory.</p>
 
 <h5>Arguments:</h5>
-<p>The first argument is always a pointer, and forms the basis of the
+<p>The first argument is always a pointer or a vector of pointers,
+   and forms the basis of the
    calculation. The remaining arguments are indices that indicate which of the
    elements of the aggregate object are indexed. The interpretation of each
    index is dependent on the type being indexed into. The first index always
@@ -5093,54 +5119,57 @@ int *foo(struct ST *s) {
 }
 </pre>
 
-<p>The LLVM code generated by the GCC frontend is:</p>
+<p>The LLVM code generated by Clang is:</p>
 
 <pre class="doc_code">
-%RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
-%ST = <a href="#namedtypes">type</a> { i32, double, %RT }
+%struct.RT = <a href="#namedtypes">type</a> { i8, [10 x [20 x i32]], i8 }
+%struct.ST = <a href="#namedtypes">type</a> { i32, double, %struct.RT }
 
-define i32* @foo(%ST* %s) {
+define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
 entry:
-  %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
-  ret i32* %reg
+  %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
+  ret i32* %arrayidx
 }
 </pre>
 
 <h5>Semantics:</h5>
-<p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
-   type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
-   }</tt>' type, a structure.  The second index indexes into the third element
-   of the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
-   i8 }</tt>' type, another structure.  The third index indexes into the second
-   element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
-   array.  The two dimensions of the array are subscripted into, yielding an
-   '<tt>i32</tt>' type.  The '<tt>getelementptr</tt>' instruction returns a
-   pointer to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
+<p>In the example above, the first index is indexing into the
+   '<tt>%struct.ST*</tt>' type, which is a pointer, yielding a
+   '<tt>%struct.ST</tt>' = '<tt>{ i32, double, %struct.RT }</tt>' type, a
+   structure. The second index indexes into the third element of the structure,
+   yielding a '<tt>%struct.RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]], i8 }</tt>'
+   type, another structure. The third index indexes into the second element of
+   the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an array. The
+   two dimensions of the array are subscripted into, yielding an '<tt>i32</tt>'
+   type. The '<tt>getelementptr</tt>' instruction returns a pointer to this
+   element, thus computing a value of '<tt>i32*</tt>' type.</p>
 
 <p>Note that it is perfectly legal to index partially through a structure,
    returning a pointer to an inner element.  Because of this, the LLVM code for
    the given testcase is equivalent to:</p>
 
-<pre>
-  define i32* @foo(%ST* %s) {
-    %t1 = getelementptr %ST* %s, i32 1                        <i>; yields %ST*:%t1</i>
-    %t2 = getelementptr %ST* %t1, i32 0, i32 2                <i>; yields %RT*:%t2</i>
-    %t3 = getelementptr %RT* %t2, i32 0, i32 1                <i>; yields [10 x [20 x i32]]*:%t3</i>
-    %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
-    %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
-    ret i32* %t5
-  }
+<pre class="doc_code">
+define i32* @foo(%struct.ST* %s) {
+  %t1 = getelementptr %struct.ST* %s, i32 1                 <i>; yields %struct.ST*:%t1</i>
+  %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         <i>; yields %struct.RT*:%t2</i>
+  %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         <i>; yields [10 x [20 x i32]]*:%t3</i>
+  %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
+  %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
+  ret i32* %t5
+}
 </pre>
 
 <p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is a <a href="#trapvalues">trap value</a> if the
+   <tt>getelementptr</tt> is a <a href="#poisonvalues">poison value</a> if the
    base pointer is not an <i>in bounds</i> address of an allocated object,
    or if any of the addresses that would be formed by successive addition of
    the offsets implied by the indices to the base address with infinitely
    precise signed arithmetic are not an <i>in bounds</i> address of that
    allocated object. The <i>in bounds</i> addresses for an allocated object
    are all the addresses that point into the object, plus the address one
-   byte past the end.</p>
+   byte past the end.
+   In cases where the base is a vector of pointers the <tt>inbounds</tt> keyword
+   applies to each of the computations element-wise. </p>
 
 <p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
    the base address with silently-wrapping two's complement arithmetic. If the
@@ -5167,6 +5196,13 @@ entry:
     %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
 </pre>
 
+<p>In cases where the pointer argument is a vector of pointers, only a
+   single index may be used, and the number of vector elements has to be
+   the same.  For example: </p>
+<pre class="doc_code">
+ %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
+</pre>
+
 </div>
 
 </div>
@@ -5539,13 +5575,16 @@ entry:
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
-   the integer type <tt>ty2</tt>.</p>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer or a vector of
+   pointers <tt>value</tt> to
+   the integer (or vector of integers) type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
-   must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
-   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
+   must be a a value of type <a href="#t_pointer">pointer</a> or a vector of
+    pointers, and a type to cast it to
+   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> or a vector
+   of integers type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
@@ -5558,8 +5597,9 @@ entry:
 
 <h5>Example:</h5>
 <pre>
-  %X = ptrtoint i32* %X to i8           <i>; yields truncation on 32-bit architecture</i>
-  %Y = ptrtoint i32* %x to i64          <i>; yields zero extension on 32-bit architecture</i>
+  %X = ptrtoint i32* %P to i8                         <i>; yields truncation on 32-bit architecture</i>
+  %Y = ptrtoint i32* %P to i64                        <i>; yields zero extension on 32-bit architecture</i>
+  %Z = ptrtoint &lt;4 x i32*&gt; %P to &lt;4 x i64&gt;<i>; yields vector zero extension for a vector of addresses on 32-bit architecture</i>
 </pre>
 
 </div>
@@ -5598,6 +5638,7 @@ entry:
   %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
   %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
   %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
+  %Z = inttoptr &lt;4 x i32&gt; %G to &lt;4 x i8*&gt;<i>; yields truncation of vector G to four pointers</i>
 </pre>
 
 </div>
@@ -5632,8 +5673,9 @@ entry:
 <p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
    <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
    this conversion.  The conversion is done as if the <tt>value</tt> had been
-   stored to memory and read back as type <tt>ty2</tt>. Pointer types may only
-   be converted to other pointer types with this instruction. To convert
+   stored to memory and read back as type <tt>ty2</tt>.
+   Pointer (or vector of pointers) types may only be converted to other pointer
+   (or vector of pointers) types with this instruction. To convert
    pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
    <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
 
@@ -5641,7 +5683,8 @@ entry:
 <pre>
   %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
   %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x int&gt; %V to i64;        <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x i32*&gt; %V to &lt;2 x i64*&gt; <i>; yields &lt;2 x i64*&gt;</i>
 </pre>
 
 </div>
@@ -5672,8 +5715,8 @@ entry:
 
 <h5>Overview:</h5>
 <p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
-   boolean values based on comparison of its two integer, integer vector, or
-   pointer operands.</p>
+   boolean values based on comparison of its two integer, integer vector,
+   pointer, or pointer vector operands.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
@@ -7293,12 +7336,12 @@ LLVM</a>.</p>
    targets support all bit widths or vector types, however.</p>
 
 <pre>
-  declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
-  declare i16 @llvm.ctlz.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.ctlz.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;)
-  declare &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src;gt)
+  declare i8   @llvm.ctlz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i16  @llvm.ctlz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i32  @llvm.ctlz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i64  @llvm.ctlz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declase &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -7306,15 +7349,22 @@ LLVM</a>.</p>
    leading zeros in a variable.</p>
 
 <h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or any vector type with integer element type.
-   The return type must match the argument type.</p>
+<p>The first argument is the value to be counted. This argument may be of any
+   integer type, or a vectory with integer element type. The return type
+   must match the first argument type.</p>
+
+<p>The second argument must be a constant and is a flag to indicate whether the
+   intrinsic should ensure that a zero as the first argument produces a defined
+   result. Historically some architectures did not provide a defined result for
+   zero values as efficiently, and many algorithms are now predicated on
+   avoiding zero-value inputs.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable, or within each element of the vector if the operation
-   is of vector type.  If the src == 0 then the result is the size in bits of
-   the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
+   zeros in a variable, or within each element of the vector.
+   If <tt>src == 0</tt> then the result is the size in bits of the type of
+   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
+   For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
 
 </div>
 
@@ -7331,12 +7381,12 @@ LLVM</a>.</p>
    support all bit widths or vector types, however.</p>
 
 <pre>
-  declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
-  declare i16 @llvm.cttz.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.cttz.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;)
-  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
+  declare i8   @llvm.cttz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i16  @llvm.cttz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i32  @llvm.cttz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i64  @llvm.cttz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -7344,15 +7394,22 @@ LLVM</a>.</p>
    trailing zeros.</p>
 
 <h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or a vectory with integer element type..  The return type
-   must match the argument type.</p>
+<p>The first argument is the value to be counted. This argument may be of any
+   integer type, or a vectory with integer element type. The return type
+   must match the first argument type.</p>
+
+<p>The second argument must be a constant and is a flag to indicate whether the
+   intrinsic should ensure that a zero as the first argument produces a defined
+   result. Historically some architectures did not provide a defined result for
+   zero values as efficiently, and many algorithms are now predicated on
+   avoiding zero-value inputs.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
    zeros in a variable, or within each element of a vector.
-   If the src == 0 then the result is the size in bits of
-   the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
+   If <tt>src == 0</tt> then the result is the size in bits of the type of
+   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
+   For example, <tt>llvm.cttz(2) = 1</tt>.</p>
 
 </div>
 
@@ -8185,11 +8242,35 @@ LLVM</a>.</p>
    compile time.</p>
 
 </div>
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="int_expect">'<tt>llvm.expect</tt>' Intrinsic</a>
+</h4>
 
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  declare i32 @llvm.expect.i32(i32 &lt;val&gt;, i32 &lt;expected_val&gt;)
+  declare i64 @llvm.expect.i64(i64 &lt;val&gt;, i64 &lt;expected_val&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic provides information about expected (the
+   most probable) value of <tt>val</tt>, which can be used by optimizers.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic takes two arguments. The first
+   argument is a value. The second argument is an expected value, this needs to
+   be a constant value, variables are not allowed.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic is lowered to the <tt>val</tt>.</p>
 </div>
 
 </div>
 
+</div>
 <!-- *********************************************************************** -->
 <hr>
 <address>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index f2d8930..b9361a5 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -4,20 +4,22 @@
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <link rel="stylesheet" href="llvm.css" type="text/css">
-  <title>LLVM 3.0 Release Notes</title>
+  <title>LLVM 3.1 Release Notes</title>
 </head>
 <body>
 
-<h1>LLVM 3.0 Release Notes</h1>
+<h1>LLVM 3.1 Release Notes</h1>
 
-<img align=right src="http://llvm.org/img/DragonSmall.png"
-    width="136" height="136" alt="LLVM Dragon Logo">
+<div>
+<img style="float:right" src="http://llvm.org/img/DragonSmall.png"
+     width="136" height="136" alt="LLVM Dragon Logo">
+</div>
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 3.0</a></li>
-  <li><a href="#whatsnew">What's New in LLVM 3.0?</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 3.1</a></li>
+  <li><a href="#whatsnew">What's New in LLVM?</a></li>
   <li><a href="GettingStarted.html">Installation Instructions</a></li>
   <li><a href="#knownproblems">Known Problems</a></li>
   <li><a href="#additionalinfo">Additional Information</a></li>
@@ -27,13 +29,11 @@
   <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
 </div>
 
-<!--
-<h1 style="color:red">These are in-progress notes for the upcoming LLVM 3.0
+<h1 style="color:red">These are in-progress notes for the upcoming LLVM 3.1
 release.<br>
 You may prefer the
-<a href="http://llvm.org/releases/2.9/docs/ReleaseNotes.html">LLVM 2.9
+<a href="http://llvm.org/releases/3.0/docs/ReleaseNotes.html">LLVM 3.0
 Release Notes</a>.</h1>
- -->
 
 <!-- *********************************************************************** -->
 <h2>
@@ -44,8 +44,9 @@ Release Notes</a>.</h1>
 <div>
 
 <p>This document contains the release notes for the LLVM Compiler
-   Infrastructure, release 3.0.  Here we describe the status of LLVM, including
-   major improvements from the previous release and significant known problems.
+   Infrastructure, release 3.1.  Here we describe the status of LLVM, including
+   major improvements from the previous release, improvements in various
+   subprojects of LLVM, and some of the current users of the code.
    All LLVM releases may be downloaded from
    the <a href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
 
@@ -61,16 +62,8 @@ Release Notes</a>.</h1>
    <a href="http://llvm.org/releases/">releases page</a>.</p>
 
 </div>
-   
-<!-- Features that need text if they're finished for 3.1:
-  ARM EHABI
-  combiner-aa?
-  strong phi elim
-  loop dependence analysis
-  CorrelatedValuePropagation
-  lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.1.
- -->
- 
+
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="subproj">Sub-project Status Update</a>
@@ -79,7 +72,7 @@ Release Notes</a>.</h1>
 
 <div>
 
-<p>The LLVM 3.0 distribution currently consists of code from the core LLVM
+<p>The LLVM 3.1 distribution currently consists of code from the core LLVM
    repository (which roughly includes the LLVM optimizers, code generators and
    supporting tools), and the Clang repository.  In
    addition to this code, the LLVM Project includes other sub-projects that are
@@ -99,37 +92,18 @@ Release Notes</a>.</h1>
    provides a modular, library-based architecture that makes it suitable for
    creating or integrating with other development tools. Clang is considered a
    production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
-   (32- and 64-bit), and for darwin/arm targets.</p>
-
-<p>In the LLVM 3.0 time-frame, the Clang team has made many improvements:</p>
+   (32- and 64-bit), and for Darwin/ARM targets.</p>
 
+<p>In the LLVM 3.1 time-frame, the Clang team has made many improvements:</p>
 <ul>
-  <li>Greatly improved support for building C++ applications, with greater
-      stability and better diagnostics.</li>
-  
-  <li><a href="http://clang.llvm.org/cxx_status.html">Improved support</a> for
-      the <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=50372">C++
-      2011</a> standard, including implementations of non-static data member
-      initializers, alias templates, delegating constructors, the range-based
-      for loop, and implicitly-generated move constructors and move assignment
-      operators, among others.</li>
-
-  <li>Implemented support for some features of the upcoming C1x standard,
-      including static assertions and generic selections.</li>
-  
-  <li>Better detection of include and linking paths for system headers and
-      libraries, especially for Linux distributions.</li>
-
-  <li>Implemented support
-      for <a href="http://clang.llvm.org/docs/AutomaticReferenceCounting.html">Automatic
-      Reference Counting</a> for Objective-C.</li>
-
-  <li>Implemented a number of optimizations in <tt>libclang</tt>, the Clang C
-      interface, to improve the performance of code completion and the mapping
-      from source locations to abstract syntax tree nodes.</li>
+  <li>...</li>
 </ul>
 
-  
+  <p>For more details about the changes to Clang since the 2.9 release, see the
+<a href="http://clang.llvm.org/docs/ReleaseNotes.html">Clang release notes</a>
+</p>
+
+
 <p>If Clang rejects your code but another compiler accepts it, please take a
    look at the <a href="http://clang.llvm.org/compatibility.html">language
    compatibility</a> guide to make sure this is not intentional or a known
@@ -151,26 +125,12 @@ Release Notes</a>.</h1>
    supports Ada, C, C++ and Fortran.  It has partial support for Go, Java, Obj-C
    and Obj-C++.</p>
 
-<p>The 3.0 release has the following notable changes:</p>
-
-  <li>GCC version 4.6 is now fully supported.</li>
+<p>The 3.1 release has the following notable changes:</p>
 
-  <li>Patching and building GCC is no longer required: the plugin should work
-      with your system GCC (version 4.5 or 4.6; on Debian/Ubuntu systems the
-      gcc-4.5-plugin-dev or gcc-4.6-plugin-dev package is also needed).</li>
-
-  <li>The <tt>-fplugin-arg-dragonegg-enable-gcc-optzns</tt> option, which runs
-      GCC's optimizers as well as LLVM's, now works much better.  This is the
-      option to use if you want ultimate performance! It not yet completely
-      stable: it may cause the plugin to crash.</li>
+  <ul>
 
-  <li>The type and constant conversion logic has been almost entirely rewritten,
-      fixing a multitude of obscure bugs.</li>
+  <li>...</li>
 
-<ul>
-<!--
-<li></li>
--->
 </ul>
 
 </div>
@@ -191,7 +151,7 @@ Release Notes</a>.</h1>
    implementations of this and other low-level routines (some are 3x faster than
    the equivalent libgcc routines).</p>
 
-<p>In the LLVM 3.0 timeframe,</p>
+<p>....</p>
 
 </div>
 
@@ -202,11 +162,12 @@ Release Notes</a>.</h1>
 
 <div>
 
-<p>LLDB has advanced by leaps and bounds in the 3.0 timeframe.  It is
-   dramatically more stable and useful, and includes both a
-   new <a href="http://lldb.llvm.org/tutorial.html">tutorial</a> and
-   a <a href="http://lldb.llvm.org/lldb-gdb.html">side-by-side comparison with
-   GDB</a>.</p>
+<p>LLDB is a ground-up implementation of a command line debugger, as well as a
+   debugger API that can be used from other applications.  LLDB makes use of the
+   Clang parser to provide high-fidelity expression parsing (particularly for
+   C++) and uses the LLVM JIT for target support.</p>
+
+<p>...</p>
 
 </div>
 
@@ -221,22 +182,7 @@ Release Notes</a>.</h1>
    licensed</a> under the MIT and UIUC license, allowing it to be used more
    permissively.</p>
 
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="LLBrowse">LLBrowse: IR Browser</a>
-</h3>
-
-<div>
-
-<p><a href="http://llvm.org/svn/llvm-project/llbrowse/trunk/doc/LLBrowse.html">
-   LLBrowse</a> is an interactive viewer for LLVM modules. It can load any LLVM
-   module and displays its contents as an expandable tree view, facilitating an
-   easy way to inspect types, functions, global variables, or metadata nodes. It
-   is fully cross-platform, being based on the popular wxWidgets GUI
-   toolkit.</p>
+<p>...</p>
 
 </div>
 
@@ -251,54 +197,20 @@ Release Notes</a>.</h1>
   implementation of a Java Virtual Machine (Java VM or JVM) that uses LLVM for
   static and just-in-time compilation.
 
-  <p>In the LLVM 3.0 time-frame, VMKit has had significant improvements on both
+  <p>In the LLVM 3.1 time-frame, VMKit has had significant improvements on both
   runtime and startup performance:</p>
 
   <ul>
-  <li>Precompilation: by compiling ahead of time a small subset of Java's core
-  library, the startup performance have been highly optimized to the point that
-  running a 'Hello World' program takes less than 30 milliseconds.</li>
-
-  <li>Customization: by customizing virtual methods for individual classes,
-  the VM can statically determine the target of a virtual call, and decide to
-  inline it.</li>
-
-  <li>Inlining: the VM does more inlining than it did before, by allowing more
-  bytecode instructions to be inlined, and thanks to customization. It also
-  inlines GC barriers, and object allocations.</li>
-
-  <li>New exception model: the generated code for a method that does not do
-  any try/catch is not penalized anymore by the eventuality of calling a
-  method that throws an exception. Instead, the method that throws the
-  exception jumps directly to the method that could catch it.</li>
+  <li>...</li>
   </ul>
 
 </div>
-  
-  
-<!--=========================================================================-->
-<!--
-<h3>
-<a name="klee">KLEE: A Symbolic Execution Virtual Machine</a>
-</h3>
-
-<div>
-<p>
-<a href="http://klee.llvm.org/">KLEE</a> is a symbolic execution framework for
-programs in LLVM bitcode form. KLEE tries to symbolically evaluate "all" paths
-through the application and records state transitions that lead to fault
-states. This allows it to construct testcases that lead to faults and can even
-be used to verify some algorithms.
-</p>
-
-<p>UPDATE!</p>
-</div>-->
 
 </div>
 
 <!-- *********************************************************************** -->
 <h2>
-  <a name="externalproj">External Open Source Projects Using LLVM 3.0</a>
+  <a name="externalproj">External Open Source Projects Using LLVM 3.1</a>
 </h2>
 <!-- *********************************************************************** -->
 
@@ -306,392 +218,15 @@ be used to verify some algorithms.
 
 <p>An exciting aspect of LLVM is that it is used as an enabling technology for
    a lot of other language and tools projects.  This section lists some of the
-   projects that have already been updated to work with LLVM 3.0.</p>
-
-<!--=========================================================================-->
-<h3>AddressSanitizer</h3>
-  
-<div>
-
-<p><a href="http://code.google.com/p/address-sanitizer/">AddressSanitizer</a>
-   uses compiler instrumentation and a specialized malloc library to find C/C++
-   bugs such as use-after-free and out-of-bound accesses to heap, stack, and
-   globals. The key feature of the tool is speed: the average slowdown
-   introduced by AddressSanitizer is less than 2x.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>ClamAV</h3>
-  
-<div>
-
-<p><a href="http://www.clamav.net">Clam AntiVirus</a> is an open source (GPL)
-   anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail
-   gateways.</p>
-
-<p>Since version 0.96 it
-   has <a href="http://vrt-sourcefire.blogspot.com/2010/09/introduction-to-clamavs-low-level.html">bytecode
-   signatures</a> that allow writing detections for complex malware.</p>
-
-<p>It uses LLVM's JIT to speed up the execution of bytecode on X86, X86-64,
-   PPC32/64, falling back to its own interpreter otherwise.  The git version was
-   updated to work with LLVM 3.0.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>clang_complete for VIM</h3>
-
-<div>
-
-<p><a href="https://github.com/Rip-Rip/clang_complete">clang_complete</a> is a
-   VIM plugin, that provides accurate C/C++ autocompletion using the clang front
-   end. The development version of clang complete, can directly use libclang
-   which can maintain a cache to speed up auto completion.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>clReflect</h3>
-
-<div>
-
-<p><a href="https://bitbucket.org/dwilliamson/clreflect">clReflect</a> is a C++
-   parser that uses clang/LLVM to derive a light-weight reflection database
-   suitable for use in game development. It comes with a very simple runtime
-   library for loading and querying the database, requiring no external
-   dependencies (including CRT), and an additional utility library for object
-   management and serialisation.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Cling C++ Interpreter</h3>
-
-<div>
-
-<p><a href="http://cern.ch/cling">Cling</a> is an interactive compiler interface
-   (aka C++ interpreter). It uses LLVM's JIT and clang; it currently supports
-   C++ and C. It has a prompt interface, runs source files, calls into shared
-   libraries, prints the value of expressions, even does runtime lookup of
-   identifiers (dynamic scopes). And it just behaves like one would expect from
-   an interpreter.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Crack Programming Language</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide
-   the ease of development of a scripting language with the performance of a
-   compiled language. The language derives concepts from C++, Java and Python,
-   incorporating object-oriented programming, operator overloading and strong
-   typing.</p>
-
-</div>
-  
-<!--=========================================================================-->
-<h3>Eero</h3>
-  
-<div>
-
-<p><a href="http://eerolanguage.org/">Eero</a> is a fully
-   header-and-binary-compatible dialect of Objective-C 2.0, implemented with a
-   patched version of the Clang/LLVM compiler. It features a streamlined syntax,
-   Python-like indentation, and new operators, for improved readability and
-   reduced code clutter. It also has new features such as limited forms of
-   operator overloading and namespaces, and strict (type-and-operator-safe)
-   enumerations. It is inspired by languages such as Smalltalk, Python, and
-   Ruby.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Glasgow Haskell Compiler (GHC)</h3>
-  
-<div>
-
-<p>GHC is an open source, state-of-the-art programming suite for Haskell, a
-   standard lazy functional programming language. It includes an optimizing
-   static compiler generating good code for a variety of platforms, together
-   with an interactive system for convenient, quick development.</p>
-
-<p>GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and
-   later. Since LLVM 2.9, GHC now includes experimental support for the ARM
-   platform with LLVM 3.0.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>gwXscript</h3>
-
-<div>
-
-<p><a href="http://botwars.tk/gwscript/">gwXscript</a> is an object oriented,
-   aspect oriented programming language which can create both executables (ELF,
-   EXE) and shared libraries (DLL, SO, DYNLIB). The compiler is implemented in
-   its own language and translates scripts into LLVM-IR which can be optimized
-   and translated into native code by the LLVM framework. Source code in
-   gwScript contains definitions that expand the namespaces. So you can build
-   your project and simply 'plug out' features by removing a file. The remaining
-   project does not leave scars since you directly separate concerns by the
-   'template' feature of gwX. It is also possible to add new features to a
-   project by just adding files and without editing the original project. This
-   language is used for example to create games or content management systems
-   that should be extendable.</p>
-
-<p>gwXscript is strongly typed and offers comfort with its native types string,
-   hash and array. You can easily write new libraries in gwXscript or native
-   code. gwXscript is type safe and users should not be able to crash your
-   program or execute malicious code except code that is eating CPU time.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>include-what-you-use</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/include-what-you-use">include-what-you-use</a>
-   is a tool to ensure that a file directly <code>#include</code>s
-   all <code>.h</code> files that provide a symbol that the file uses. It also
-   removes superfluous <code>#include</code>s from source files.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>ispc: The Intel SPMD Program Compiler</h3>
-
-<div>
-
-<p><a href="http://ispc.github.com">ispc</a> is a compiler for "single program,
-   multiple data" (SPMD) programs. It compiles a C-based SPMD programming
-   language to run on the SIMD units of CPUs; it often delivers 5-6x speedups on
-   a single core of a CPU with an 8-wide SIMD unit compared to serial code,
-   while still providing a clean and easy-to-understand programming model.  For
-   an introduction to the language and its performance,
-   see <a href="http://ispc.github.com/example.html">the walkthrough</a> of a short
-   example program.  ispc is licensed under the BSD license.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>LanguageKit and Pragmatic Smalltalk</h3>
-
-<div>
-
-<p><a href="http://etoileos.com/etoile/features/languagekit/">LanguageKit</a> is
-   a framework for implementing dynamic languages sharing an object model with
-   Objective-C. It provides static and JIT compilation using LLVM along with
-   its own interpreter. Pragmatic Smalltalk is a dialect of Smalltalk, built on
-   top of LanguageKit, that interfaces directly with Objective-C, sharing the
-   same object representation and message sending behaviour. These projects are
-   developed as part of the &Eacute;toi&eacute; desktop environment.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>LuaAV</h3>
-
-<div>
-
-<p><a href="http://lua-av.mat.ucsb.edu/blog/">LuaAV</a> is a real-time
-   audiovisual scripting environment based around the Lua language and a
-   collection of libraries for sound, graphics, and other media protocols. LuaAV
-   uses LLVM and Clang to JIT compile efficient user-defined audio synthesis
-   routines specified in a declarative syntax.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Mono</h3>
-
-<div>
-
-<p>An open source, cross-platform implementation of C# and the CLR that is
-   binary compatible with Microsoft.NET. Has an optional, dynamically-loaded
-   LLVM code generation backend in Mini, the JIT compiler.</p>
-
-<p>Note that we use a Git mirror of LLVM with some patches. See:
-   https://github.com/mono/llvm</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Polly</h3>
-
-<div>
-
-<p><a href="http://polly.grosser.es">Polly</a> is an advanced data-locality
-   optimizer and automatic parallelizer. It uses an advanced, mathematical
-   model to calculate detailed data dependency information which it uses to
-   optimize the loop structure of a program. Polly can speed up sequential code
-   by improving memory locality and consequently the cache use. Furthermore,
-   Polly is able to expose different kind of parallelism which it exploits by
-   introducing (basic) OpenMP and SIMD code. A mid-term goal of Polly is to
-   automatically create optimized GPU code.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Portable OpenCL (pocl)</h3>
-
-<div>
-
-<p>Portable OpenCL is an open source implementation of the OpenCL standard which
-   can be easily adapted for new targets. One of the goals of the project is
-   improving performance portability of OpenCL programs, avoiding the need for
-   target-dependent manual optimizations. A "native" target is included, which
-   allows running OpenCL kernels on the host (CPU).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Pure</h3>
-  
-<div>
-<p><a href="http://pure-lang.googlecode.com/">Pure</a> is an
-  algebraic/functional programming language based on term rewriting. Programs
-  are collections of equations which are used to evaluate expressions in a
-  symbolic fashion. The interpreter uses LLVM as a backend to JIT-compile Pure
-  programs to fast native code. Pure offers dynamic typing, eager and lazy
-  evaluation, lexical closures, a hygienic macro system (also based on term
-  rewriting), built-in list and matrix support (including list and matrix
-  comprehensions) and an easy-to-use interface to C and other programming
-  languages (including the ability to load LLVM bitcode modules, and inline C,
-  C++, Fortran and Faust code in Pure programs if the corresponding LLVM-enabled
-  compilers are installed).</p>
-  
-<p>Pure version 0.48 has been tested and is known to work with LLVM 3.0
-  (and continues to work with older LLVM releases &gt;= 2.5).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Renderscript</h3>
-
-<div>
-
-<p><a href="http://developer.android.com/guide/topics/renderscript/index.html">Renderscript</a>
-   is Android's advanced 3D graphics rendering and compute API. It provides a
-   portable C99-based language with extensions to facilitate common use cases
-   for enhancing graphics and thread level parallelism. The Renderscript
-   compiler frontend is based on Clang/LLVM. It emits a portable bitcode format
-   for the actual compiled script code, as well as reflects a Java interface for
-   developers to control the execution of the compiled bitcode. Executable
-   machine code is then generated from this bitcode by an LLVM backend on the
-   device. Renderscript is thus able to provide a mechanism by which Android
-   developers can improve performance of their applications while retaining
-   portability.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>SAFECode</h3>
-
-<div>
-
-<p><a href="http://safecode.cs.illinois.edu">SAFECode</a> is a memory safe C/C++
-   compiler built using LLVM.  It takes standard, unannotated C/C++ code,
-   analyzes the code to ensure that memory accesses and array indexing
-   operations are safe, and instruments the code with run-time checks when
-   safety cannot be proven statically.  SAFECode can be used as a debugging aid
-   (like Valgrind) to find and repair memory safety bugs.  It can also be used
-   to protect code from security attacks at run-time.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>The Stupid D Compiler (SDC)</h3>
-
-<div>
-
-<p><a href="https://github.com/bhelyer/SDC">The Stupid D Compiler</a> is a
-   project seeking to write a self-hosting compiler for the D programming
-   language without using the frontend of the reference compiler (DMD).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>TTA-based Co-design Environment (TCE)</h3>
-
-<div>
-
-<p>TCE is a toolset for designing application-specific processors (ASP) based on
-   the Transport triggered architecture (TTA). The toolset provides a complete
-   co-design flow from C/C++ programs down to synthesizable VHDL and parallel
-   program binaries. Processor customization points include the register files,
-   function units, supported operations, and the interconnection network.</p>
-  
-<p>TCE uses Clang and LLVM for C/C++ language support, target independent
-   optimizations and also for parts of code generation. It generates new
-   LLVM-based code generators "on the fly" for the designed TTA processors and
-   loads them in to the compiler backend as runtime libraries to avoid
-   per-target recompilation of larger parts of the compiler chain.</p>
-
-</div>
-  
-<!--=========================================================================-->
-<h3>Tart Programming Language</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/tart/">Tart</a> is a general-purpose,
-   strongly typed programming language designed for application
-   developers. Strongly inspired by Python and C#, Tart focuses on practical
-   solutions for the professional software developer, while avoiding the clutter
-   and boilerplate of legacy languages like Java and C++. Although Tart is still
-   in development, the current implementation supports many features expected of
-   a modern programming language, such as garbage collection, powerful
-   bidirectional type inference, a greatly simplified syntax for template
-   metaprogramming, closures and function literals, reflection, operator
-   overloading, explicit mutability and immutability, and much more. Tart is
-   flexible enough to accommodate a broad range of programming styles and
-   philosophies, while maintaining a strong commitment to simplicity, minimalism
-   and elegance in design.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>ThreadSanitizer</h3>
-
-<div>
+   projects that have already been updated to work with LLVM 3.1.</p>
 
-<p><a href="http://code.google.com/p/data-race-test/">ThreadSanitizer</a> is a
-   data race detector for (mostly) C and C++ code, available for Linux, Mac OS
-   and Windows. On different systems, we use binary instrumentation frameworks
-   (Valgrind and Pin) as frontends that generate the program events for the race
-   detection algorithm. On Linux, there's an option of using LLVM-based
-   compile-time instrumentation.</p>
+  ... to be filled in right before the release ...
 
 </div>
 
-<!--=========================================================================-->
-<h3>The ZooLib C++ Cross-Platform Application Framework</h3>
-
-<div>
-
-<p><a href="http://www.zoolib.org/">ZooLib</a> is Open Source under the MIT
-   License. It provides GUI, filesystem access, TCP networking, thread-safe
-   memory management, threading and locking for Mac OS X, Classic Mac OS,
-   Microsoft Windows, POSIX operating systems with X11, BeOS, Haiku, Apple's iOS
-   and Research in Motion's BlackBerry.</p>
-
-<p>My current work is to use CLang's static analyzer to improve ZooLib's code
-   quality.  I also plan to set up LLVM compiles of the demo programs and test
-   programs using CLang and LLVM on all the platforms that CLang, LLVM and
-   ZooLib all support.</p>
-
-</div>
-  
-</div>
-
 <!-- *********************************************************************** -->
 <h2>
-  <a name="whatsnew">What's New in LLVM 3.0?</a>
+  <a name="whatsnew">What's New in LLVM 3.1?</a>
 </h2>
 <!-- *********************************************************************** -->
 
@@ -708,48 +243,36 @@ be used to verify some algorithms.
 
 <div>
 
-<p><b>llvm-gcc is gone</b></p>
+  <!-- Features that need text if they're finished for 3.1:
+   ARM EHABI
+   combiner-aa?
+   strong phi elim
+   loop dependence analysis
+   CorrelatedValuePropagation
+   lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.1.
+   Integrated assembler on by default for arm/thumb?
+
+   -->
 
-<p>LLVM 3.0 includes several major new capabilities:</p>
-  
-<!-- Near dead:
+  <!-- Near dead:
    Analysis/RegionInfo.h + Dom Frontiers
    SparseBitVector: used in LiveVar.
- 
- -->
-  
-<!--
- Type system rewrite.
- Better performance for Neon code in clang due to SRoA improvements.
- New regalloc on by default. Lin scan going away in 3.1
- PGO / builtin_expect improvements (summary needed)
- Big EH rewrite.
- AVX support, assembler, compiler and disassembler.
- IndVar improvements: andy
- PTX backend improvements: Justin
- llvm-rtdyld & MC JIT: JimG
- InstAliases now automatically used in the asmprinter where they are shorter.
- Integrated assembler on by default for arm/thumb?
- PostOrder Dominator frontiers were removed.
- Line Profiling / gcov support
- EH and debug information produced with CFI directives, yielding smaller executables: http://blog.mozilla.com/respindola/2011/05/12/cfi-directives/
- X86-64 generates smaller and faster code at -O0 (fast isel improvements)
- Better code generation for Cortex-A9
- Many APIs take ArrayRef's now.
- Pass manager extension API.
- 
- -->
-  
-<ul>
+   llvm/lib/Archive - replace with lib object?
+   -->
 
-<!--
-<li></li>
--->
-  
+<p>LLVM 3.1 includes several major changes and big features:</p>
+
+<ul>
+  <li><a href="../tools/clang/docs/AddressSanitizer.html">AddressSanitizer</a>,
+      a fast memory error detector.</li>
+  <li><a href="CodeGenerator.html#machineinstrbundle">MachineInstr Bundles</a>,
+      Support to model instruction bundling / packing.</li>
+  <li>....</li>
 </ul>
-  
+
 </div>
 
+
 <!--=========================================================================-->
 <h3>
 <a name="coreimprovements">LLVM IR and Core Improvements</a>
@@ -760,140 +283,9 @@ be used to verify some algorithms.
 <p>LLVM IR has several new features for better support of new targets and that
    expose new optimization opportunities:</p>
 
-<p>One of the biggest changes is that 3.0 has a new exception handling
-   system. The old system used LLVM intrinsics to convey the exception handling
-   information to the code generator. It worked in most cases, but not
-   all. Inlining was especially difficult to get right. Also, the intrinsics
-   could be moved away from the <code>invoke</code> instruction, making it hard
-   to recover that information.</p>
-
-<p>The new EH system makes exception handling a first-class member of the IR. It
-   adds two new instructions:</p>
-
-<ul>
-  <li><a href="LangRef.html#i_landingpad"><code>landingpad</code></a> &mdash;
-      this instruction defines a landing pad basic block. It contains all of the
-      information that's needed by the code generator. It's also required to be
-      the first non-PHI instruction in the landing pad. In addition, a landing
-      pad may be jumped to only by the unwind edge of an <code>invoke</code>
-      instruction.</li>
-
-  <li><a href="LangRef.html#i_resume"><code>resume</code></a> &mdash; this
-      instruction causes the current exception to resume traveling up the
-      stack. It replaces the <code>@llvm.eh.resume</code> intrinsic.</li>
-</ul>
-
-<p>Converting from the old EH API to the new EH API is rather simple, because a
-   lot of complexity has been removed. The two intrinsics,
-   <code>@llvm.eh.exception</code> and <code>@llvm.eh.selector</code> have been
-   superceded by the <code>landingpad</code> instruction. Instead of generating
-   a call to <code>@llvm.eh.exception</code> and <code>@llvm.eh.selector</code>:
-
-<div class="doc_code">
-<pre>
-Function *ExcIntr = Intrinsic::getDeclaration(TheModule,
-                                              Intrinsic::eh_exception);
-Function *SlctrIntr = Intrinsic::getDeclaration(TheModule,
-                                                Intrinsic::eh_selector);
-
-// The exception pointer.
-Value *ExnPtr = Builder.CreateCall(ExcIntr, "exc_ptr");
-
-std::vector&lt;Value*&gt; Args;
-Args.push_back(ExnPtr);
-Args.push_back(Builder.CreateBitCast(Personality,
-                                     Type::getInt8PtrTy(Context)));
-
-<i>// Add selector clauses to Args.</i>
-
-// The selector call.
-Builder.CreateCall(SlctrIntr, Args, "exc_sel");
-</pre>
-</div>
-
-<p>You should instead generate a <code>landingpad</code> instruction, that
-   returns an exception object and selector value:</p>
-
-<div class="doc_code">
-<pre>
-LandingPadInst *LPadInst =
-  Builder.CreateLandingPad(StructType::get(Int8PtrTy, Int32Ty, NULL),
-                           Personality, 0);
-
-Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
-Builder.CreateStore(LPadExn, getExceptionSlot());
-
-Value *LPadSel = Builder.CreateExtractValue(LPadInst, 1);
-Builder.CreateStore(LPadSel, getEHSelectorSlot());
-</pre>
-</div>
-
-<p>It's now trivial to add the individual clauses to the <code>landingpad</code>
-   instruction.</p>
-
-<div class="doc_code">
-<pre>
-<i><b>// Adding a catch clause</b></i>
-Constant *TypeInfo = getTypeInfo();
-LPadInst-&gt;addClause(TypeInfo);
-
-<i><b>// Adding a C++ catch-all</b></i>
-LPadInst-&gt;addClause(Constant::getNullValue(Builder.getInt8PtrTy()));
-
-<i><b>// Adding a cleanup</b></i>
-LPadInst-&gt;setCleanup(true);
-
-<i><b>// Adding a filter clause</b></i>
-std::vector&lt;Constant*&gt; TypeInfos;
-Constant *TypeInfo = getFilterTypeInfo();
-TypeInfos.push_back(Builder.CreateBitCast(TypeInfo, Builder.getInt8PtrTy()));
-
-ArrayType *FilterTy = ArrayType::get(Int8PtrTy, TypeInfos.size());
-LPadInst-&gt;addClause(ConstantArray::get(FilterTy, TypeInfos));
-</pre>
-</div>
-
-<p>Converting from using the <code>@llvm.eh.resume</code> intrinsic to
-   the <code>resume</code> instruction is trivial. It takes the exception
-   pointer and exception selector values returned by
-   the <code>landingpad</code> instruction:</p>
-
-<div class="doc_code">
-<pre>
-Type *UnwindDataTy = StructType::get(Builder.getInt8PtrTy(),
-                                     Builder.getInt32Ty(), NULL);
-Value *UnwindData = UndefValue::get(UnwindDataTy);
-Value *ExcPtr = Builder.CreateLoad(getExceptionObjSlot());
-Value *ExcSel = Builder.CreateLoad(getExceptionSelSlot());
-UnwindData = Builder.CreateInsertValue(UnwindData, ExcPtr, 0, "exc_ptr");
-UnwindData = Builder.CreateInsertValue(UnwindData, ExcSel, 1, "exc_sel");
-Builder.CreateResume(UnwindData);
-</pre>
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="loopoptimization">Loop Optimization Improvements</a>
-</h3>
-
-<div>
-<p>The induction variable simplification pass in 3.0 only modifies
-   induction variables when profitable. Sign and zero extension
-   elimination, linear function test replacement, loop unrolling, and
-   other simplifications that require induction variable analysis have
-   been generalized so they no longer require loops to be rewritten in a
-   typically suboptimal form prior to optimization. This new design
-   preserves more IR level information, avoids undoing earlier loop
-   optimizations (particularly hand-optimized loops), and no longer
-   strongly depends on the code generator rewriting loops a second time
-   in a now optimal form--an intractable problem.</p>
-
-<p>The original behavior can be restored with -mllvm -enable-iv-rewrite;
-   however, support for this mode will be short lived. As such, bug
-   reports should be filed for any significant performance regressions
-   when moving from -mllvm -enable-iv-rewrite to the 3.0 default mode.</p>
+  <ul>
+    <li>....</li>
+  </ul>
 </div>
 
 <!--=========================================================================-->
@@ -903,16 +295,12 @@ Builder.CreateResume(UnwindData);
 
 <div>
 
-<p>In addition to a large array of minor performance tweaks and bug fixes, this
+<p>In addition to many minor performance tweaks and bug fixes, this
    release includes a few major enhancements and additions to the
    optimizers:</p>
 
 <ul>
-<!--
-<li></li>
--->
-</li>
-  
+  <li>....</li>
 </ul>
 
 </div>
@@ -927,24 +315,14 @@ Builder.CreateResume(UnwindData);
 <p>The LLVM Machine Code (aka MC) subsystem was created to solve a number of
    problems in the realm of assembly, disassembly, object file format handling,
    and a number of other related areas that CPU instruction-set level tools work
-   in.</p>
+   in. For more information, please see
+  the <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
+    to the LLVM MC Project Blog Post</a>.</p>
 
 <ul>
-  <li>The ELF object streamers are much more full featured.</li>
-  <li>Target dependent relocation handling has been refactored into the Targets.</li>
-  <li>Early stage MC-JIT infrastructure has been implemented.</li>
+  <li>....</li>
 </ul>
 
-<p>The MC-JIT is a major new feature for MC, and will eventually grow to replace
-the current JIT implementation. It emits object files direct to memory and
-uses a runtime dynamic linker to resolve references and drive lazy compilation.
-The MC-JIT enables much greater code reuse between the JIT and the static
-compiler and provides better integration with the platform ABI as a result.</p>
-
-<p>For more information, please see
-   the <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
-   to the LLVM MC Project Blog Post</a>.</p>
-
 </div>
 
 <!--=========================================================================-->
@@ -959,9 +337,7 @@ compiler and provides better integration with the platform ABI as a result.</p>
    make it run faster:</p>
 
 <ul>
-<!--
-<li></li>
--->
+  <li>....</li>
 </ul>
 </div>
 
@@ -975,17 +351,7 @@ compiler and provides better integration with the platform ABI as a result.</p>
 <p>New features and major changes in the X86 target include:</p>
 
 <ul>
-
-  <li>The X86 backend now supports
-    all <a href="http://llvm.org/PR879">inline assembly that uses the X86
-      floating point stack</a>.</li>
-  
-  <li>The CRC32 intrinsics have been renamed.  The intrinsics were previously
-      <code>@llvm.x86.sse42.crc32.[8|16|32]</code>
-      and <code>@llvm.x86.sse42.crc64.[8|64]</code>. They have been renamed to
-      <code>@llvm.x86.sse42.crc32.32.[8|16|32]</code> and
-      <code>@llvm.x86.sse42.crc32.64.[8|64]</code>.</li>
-
+  <li>....</li>
 </ul>
 
 </div>
@@ -1000,13 +366,11 @@ compiler and provides better integration with the platform ABI as a result.</p>
 <p>New features of the ARM target include:</p>
 
 <ul>
-  <li>Reworked Set Jump Long Jump EH Lowering,</li>
-  <li>improved support for Cortex-M series processors, and</li>
-  <li>beta quality integrated assembler support.</li>
+  <li>....</li>
 </ul>
 </div>
 
-  
+
 <!--=========================================================================-->
 <h3>
 <a name="MIPS">MIPS Target Improvements</a>
@@ -1014,62 +378,25 @@ compiler and provides better integration with the platform ABI as a result.</p>
 
 <div>
 
-<p>New features and major changes in the MIPS target include:</p>
+<p>This release has seen major new work on just about every aspect of the MIPS
+  backend.  Some of the major new features include:</p>
 
 <ul>
-  <li>Most MIPS32r1 and r2 instructions are now supported.</li>
-  <li>LE/BE MIPS32r1/r2 has been tested extensively.</li>
-  <li>O32 ABI has been fully tested.</li>
-  <li>MIPS backend has migrated to using the MC infrastructure for assembly printing. Initial support for direct object code emission has been implemented too.</li>
-  <li>Delay slot filler has been updated. Now it tries to fill delay slots with useful instructions instead of always filling them with NOPs.</li> 
-  <li>Support for old-style JIT is complete.</li>
-  <li>Support for old architectures (MIPS1 and MIPS2) has been removed.</li>
-  <li>Initial support for MIPS64 has been added.</li>
+  <li>....</li>
 </ul>
 </div>
 
 <!--=========================================================================-->
 <h3>
-  <a name="PTX">PTX Target Improvements</a>
-</h3>
-
-<div>
-  
-  <p>
-  The PTX back-end is still experimental, but is fairly usable for compute kernels
-  in LLVM 3.0.  Most scalar arithmetic is implemented, as well as intrinsics to
-  access the special PTX registers and sync instructions.  The major missing
-  pieces are texture/sampler support and some vector operations.</p>
-  
-  <p>That said, the backend is already being used for domain-specific languages
-  and works well with the <a href="http://www.pcc.me.uk/~peter/libclc/">libclc
-    library</a> to supply OpenCL built-ins.  With it, you can use Clang to compile
-  OpenCL code into PTX and execute it by loading the resulting PTX as a binary
-  blob using the nVidia OpenCL library.  It has been tested with several OpenCL
-  programs, including some from the nVidia GPU Computing SDK, and the performance
-  is on par with the nVidia compiler.</p>
-  
-</div>
-
-<!--=========================================================================-->
-<h3>
 <a name="OtherTS">Other Target Specific Improvements</a>
 </h3>
 
 <div>
 
-  <p>PPC32/ELF va_arg was implemented.</p>
-  <p>PPC32 initial support for .o file writing was implemented.</p>
-  <p>MicroBlaze scheduling itineraries were added that model the
-     3-stage and the 5-stage pipeline architectures. The 3-stage
-     pipeline model can be selected with <code>-mcpu=mblaze3</code> 
-     and the 5-stage pipeline model can be selected with 
-     <code>-mcpu=mblaze5</code>.</p>
-  
 <ul>
-<!--
-<li></li>
--->
+  <li>....</li>
+
+
 </ul>
 
 </div>
@@ -1082,37 +409,18 @@ compiler and provides better integration with the platform ABI as a result.</p>
 <div>
 
 <p>If you're already an LLVM user or developer with out-of-tree changes based on
-   LLVM 2.9, this section lists some "gotchas" that you may run into upgrading
+   LLVM 3.1, this section lists some "gotchas" that you may run into upgrading
    from the previous release.</p>
 
 <ul>
-  <li>The <code>LLVMC</code> front end code was removed while separating
-      out language independence.</li>
-  <li>The <code>LowerSetJmp</code> pass wasn't used effectively by any
-      target and has been removed.</li>
-  <li>The old <code>TailDup</code> pass was not used in the standard pipeline
-      and was unable to update ssa form, so it has been removed.
-  <li>The syntax of volatile loads and stores in IR has been changed to
-      "<code>load volatile</code>"/"<code>store volatile</code>".  The old
-      syntax ("<code>volatile load</code>"/"<code>volatile store</code>")
-      is still accepted, but is now considered deprecated.</li>
-  <li>The old atomic intrinscs (<code>llvm.memory.barrier</code> and
-      <code>llvm.atomic.*</code>) are now gone.  Please use the new atomic
-      instructions, described in the <a href="Atomics.html">atomics guide</a>.
-</ul>
-
-<h4>Windows (32-bit)</h4>
-<div>
-
-<ul>
-  <li>On Win32(MinGW32 and MSVC), Windows 2000 will not be supported.
-      Windows XP or higher is required.</li>
+<li>LLVM 3.1 removes support for reading LLVM 2.9 bitcode files.  Going forward,
+    we aim for all future versions of LLVM to read bitcode files and .ll files
+    produced by LLVM 3.0 and later.</li>
+  <li>....</li>
 </ul>
 
 </div>
 
-</div>
-
 <!--=========================================================================-->
 <h3>
 <a name="api_changes">Internal API Changes</a>
@@ -1124,104 +432,7 @@ compiler and provides better integration with the platform ABI as a result.</p>
    LLVM API changes are:</p>
 
 <ul>
-  <li>The biggest and most pervasive change is that llvm::Type's are no longer
-      returned or accepted as 'const' values.  Instead, just pass around
-      non-const Type's.</li>
-  
-  <li><code>PHINode::reserveOperandSpace</code> has been removed. Instead, you
-      must specify how many operands to reserve space for when you create the
-      PHINode, by passing an extra argument
-      into <code>PHINode::Create</code>.</li>
-
-  <li>PHINodes no longer store their incoming BasicBlocks as operands. Instead,
-      the list of incoming BasicBlocks is stored separately, and can be accessed
-      with new functions <code>PHINode::block_begin</code>
-      and <code>PHINode::block_end</code>.</li>
-
-  <li>Various functions now take an <code>ArrayRef</code> instead of either a
-      pair of pointers (or iterators) to the beginning and end of a range, or a
-      pointer and a length. Others now return an <code>ArrayRef</code> instead
-      of a reference to a <code>SmallVector</code>
-      or <code>std::vector</code>. These include:
-<ul>
-<!-- Please keep this list sorted. -->
-<li><code>CallInst::Create</code></li>
-<li><code>ComputeLinearIndex</code> (in <code>llvm/CodeGen/Analysis.h</code>)</li>
-<li><code>ConstantArray::get</code></li>
-<li><code>ConstantExpr::getExtractElement</code></li>
-<li><code>ConstantExpr::getGetElementPtr</code></li>
-<li><code>ConstantExpr::getInBoundsGetElementPtr</code></li>
-<li><code>ConstantExpr::getIndices</code></li>
-<li><code>ConstantExpr::getInsertElement</code></li>
-<li><code>ConstantExpr::getWithOperands</code></li>
-<li><code>ConstantFoldCall</code> (in <code>llvm/Analysis/ConstantFolding.h</code>)</li>
-<li><code>ConstantFoldInstOperands</code> (in <code>llvm/Analysis/ConstantFolding.h</code>)</li>
-<li><code>ConstantVector::get</code></li>
-<li><code>DIBuilder::createComplexVariable</code></li>
-<li><code>DIBuilder::getOrCreateArray</code></li>
-<li><code>ExtractValueInst::Create</code></li>
-<li><code>ExtractValueInst::getIndexedType</code></li>
-<li><code>ExtractValueInst::getIndices</code></li>
-<li><code>FindInsertedValue</code> (in <code>llvm/Analysis/ValueTracking.h</code>)</li>
-<li><code>gep_type_begin</code> (in <code>llvm/Support/GetElementPtrTypeIterator.h</code>)</li>
-<li><code>gep_type_end</code> (in <code>llvm/Support/GetElementPtrTypeIterator.h</code>)</li>
-<li><code>GetElementPtrInst::Create</code></li>
-<li><code>GetElementPtrInst::CreateInBounds</code></li>
-<li><code>GetElementPtrInst::getIndexedType</code></li>
-<li><code>InsertValueInst::Create</code></li>
-<li><code>InsertValueInst::getIndices</code></li>
-<li><code>InvokeInst::Create</code></li>
-<li><code>IRBuilder::CreateCall</code></li>
-<li><code>IRBuilder::CreateExtractValue</code></li>
-<li><code>IRBuilder::CreateGEP</code></li>
-<li><code>IRBuilder::CreateInBoundsGEP</code></li>
-<li><code>IRBuilder::CreateInsertValue</code></li>
-<li><code>IRBuilder::CreateInvoke</code></li>
-<li><code>MDNode::get</code></li>
-<li><code>MDNode::getIfExists</code></li>
-<li><code>MDNode::getTemporary</code></li>
-<li><code>MDNode::getWhenValsUnresolved</code></li>
-<li><code>SimplifyGEPInst</code> (in <code>llvm/Analysis/InstructionSimplify.h</code>)</li>
-<li><code>TargetData::getIndexedOffset</code></li>
-</ul></li>
-
-  <li>All forms of <code>StringMap::getOrCreateValue</code> have been remove
-      except for the one which takes a <code>StringRef</code>.</li>
-
-  <li>The <code>LLVMBuildUnwind</code> function from the C API was removed. The
-      LLVM <code>unwind</code> instruction has been deprecated for a long time
-      and isn't used by the current front-ends. So this was removed during the
-      exception handling rewrite.</li>
-
-  <li>The <code>LLVMAddLowerSetJmpPass</code> function from the C API was
-      removed because the <code>LowerSetJmp</code> pass was removed.</li>
-
-  <li>The <code>DIBuilder</code> interface used by front ends to encode
-      debugging information in the LLVM IR now expects clients to
-      use <code>DIBuilder::finalize()</code> at the end of translation unit to
-      complete debugging information encoding.</li>
-
-  <li>The way the type system works has been
-      rewritten: <code>PATypeHolder</code> and <code>OpaqueType</code> are gone,
-      and all APIs deal with <code>Type*</code> instead of <code>const
-      Type*</code>.  If you need to create recursive structures, then create a
-      named structure, and use <code>setBody()</code> when all its elements are
-      built.  Type merging and refining is gone too: named structures are not
-      merged with other structures, even if their layout is identical.  (of
-      course anonymous structures are still uniqued by layout).</li>
-
-  <li>TargetSelect.h moved to Support/ from Target/</li>
-
-  <li>UpgradeIntrinsicCall no longer upgrades pre-2.9 intrinsic calls (for
-      example <code>llvm.memset.i32</code>).</li>
-
-  <li>It is mandatory to initialize all out-of-tree passes too and their dependencies now with
-      <code>INITIALIZE_PASS{BEGIN,END,}</code>
-      and <code>INITIALIZE_{PASS,AG}_DEPENDENCY</code>.</li>
-
-  <li>The interface for MemDepResult in MemoryDependenceAnalysis has been
-      enhanced with new return types Unknown and NonFuncLocal, in addition to
-      the existing types Clobber, Def, and NonLocal.</li>
+  <li>....</li>
 </ul>
 
 </div>
@@ -1236,150 +447,34 @@ compiler and provides better integration with the platform ABI as a result.</p>
 
 <div>
 
-<p>This section contains significant known problems with the LLVM system, listed
-   by component.  If you run into a problem, please check
-   the <a href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
-   there isn't already one.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="experimental">Experimental features included with this release</a>
-</h3>
-
-<div>
-
-<p>The following components of this LLVM release are either untested, known to
-   be broken or unreliable, or are in early development.  These components
-   should not be relied on, and bugs should not be filed against them, but they
-   may be useful to some people.  In particular, if you would like to work on
-   one of these components, please contact us on
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
-   list</a>.</p>
-
-<ul>
-  <li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PTX, SystemZ and
-      XCore backends are experimental.</li>
-
-  <li><tt>llc</tt> "<tt>-filetype=obj</tt>" is experimental on all targets other
-      than darwin and ELF X86 systems.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="x86-be">Known problems with the X86 back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>The X86-64 backend <a href="http://llvm.org/PR1740">does not yet support
-      the <tt>va_arg</tt> LLVM IR instruction</a>. Currently, front-ends support
-      variadic argument constructs on X86-64 by lowering them manually.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ppc-be">Known problems with the PowerPC back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>The PPC32/ELF support lacks PIC support.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="arm-be">Known problems with the ARM back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
-      processors, thumb programs can crash or produce wrong results
-      (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
-
-  <li>Compilation for ARM Linux OABI (old ABI) is supported but not fully
-      tested.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="sparc-be">Known problems with the SPARC back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not
-      support the 64-bit SPARC ABI (-m64).</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mips-be">Known problems with the MIPS back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>64-bit MIPS targets are not supported yet.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="alpha-be">Known problems with the Alpha back-end</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>On 21164s, some rare FP arithmetic sequences which may trap do not have
-      the appropriate nops inserted to ensure restartability.</li>
-</ul>
+<p>LLVM is generally a production quality compiler, and is used by a broad range
+   of applications and shipping in many products.  That said, not every
+   subsystem is as mature as the aggregate, particularly the more obscure
+   targets.  If you run into a problem, please check the <a
+   href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
+   there isn't already one or ask on the <a
+    href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
+    list</a>.</p>
 
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="c-be">Known problems with the C back-end</a>
-</h3>
-
-<div>
-
-<p>The C backend has numerous problems and is not being actively maintained.
-   Depending on it for anything serious is not advised.</p>
+  <p>Known problem areas include:</p>
 
 <ul>
-  <li><a href="http://llvm.org/PR802">The C backend has only basic support for
-      inline assembly code</a>.</li>
-
-  <li><a href="http://llvm.org/PR1658">The C backend violates the ABI of common
-      C++ programs</a>, preventing intermixing between C++ compiled by the CBE
-      and C++ code compiled with <tt>llc</tt> or native compilers.</li>
-
-  <li>The C backend does not support all exception handling constructs.</li>
-
-  <li>The C backend does not support arbitrary precision integers.</li>
+  <li>The Alpha, Blackfin, CellSPU, MSP430, PTX, SystemZ and
+      XCore backends are experimental, and the Alpha, Blackfin and SystemZ
+      targets have already been removed from mainline.</li>
+
+  <li>The integrated assembler, disassembler, and JIT is not supported by
+      several targets.  If an integrated assembler is not supported, then a
+      system assembler is required.  For more details, see the <a
+      href="CodeGenerator.html#targetfeatures">Target Features Matrix</a>.
+  </li>
+
+  <li>The C backend has numerous problems and is not being actively maintained.
+    Depending on it for anything serious is not advised.</li>
 </ul>
 
 </div>
 
-</div>
-
 <!-- *********************************************************************** -->
 <h2>
   <a name="additionalinfo">Additional Information</a>
diff --git a/docs/SegmentedStacks.html b/docs/SegmentedStacks.html
index a91b109..16f5507 100644
--- a/docs/SegmentedStacks.html
+++ b/docs/SegmentedStacks.html
@@ -20,18 +20,12 @@
                   <li><a href="#alloca">Variable Sized Allocas</a></li>
                 </ol>
           </li>
-          <li><a href="#results">Results</a>
-            <ol>
-              <li><a href="#go">Go on LLVM</a></li>
-              <li><a href="#abi">Runtime ABI</a></li>
-            </ol>
-          </li>
         </ol>
 
         <h2><a name="intro">Introduction</a></h2>
         <div>
           <p>
-            Segmented stack allows stack space to be allocated incrementally than as a monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called <em>stacklets</em>) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, call into the libgcc runtime to allocate more stack space. Support for segmented stacks on x86 / Linux is currently being worked on.
+            Segmented stack allows stack space to be allocated incrementally than as a monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called <em>stacklets</em>) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, call into the libgcc runtime to allocate more stack space. When using <tt>llc</tt>, segmented stacks can be enabled by adding <tt>-segmented-stacks</tt> to the command line.
           </p>
           <p>
             The runtime functionality is <a href="http://gcc.gnu.org/wiki/SplitStacks">already there in libgcc</a>.
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 3309ac3..bb47cb1 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -253,40 +253,35 @@ programs), first checkout and setup the <tt>test-suite</tt> module:</p>
 
 <div class="doc_code">
 <pre>
-% cd llvm/projects
+% cd ~/llvm/projects
 % svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
 % cd ..
-% ./configure --with-llvmgccdir=$LLVM_GCC_DIR
 </pre>
 </div>
 
-<p>where <tt>$LLVM_GCC_DIR</tt> is the directory where
-you <em>installed</em> llvm-gcc, not its src or obj
-dir. The <tt>--with-llvmgccdir</tt> option assumes that
-the <tt>llvm-gcc-4.2</tt> module was configured with
-<tt>--program-prefix=llvm-</tt>, and therefore that the C and C++
-compiler drivers are called <tt>llvm-gcc</tt> and <tt>llvm-g++</tt>
-respectively.  If this is not the case,
-use <tt>--with-llvmgcc</tt>/<tt>--with-llvmgxx</tt> to specify each
-executable's location.</p>
+<p>and then configure and build normally as you would from the
+<a href="http://llvm.org/docs/GettingStarted.html#quickstart">Getting Started
+Guide</a>. This will autodetect first the built clang if you are building
+clang, then <tt>clang</tt> in your path and finally look for <tt>llvm-gcc</tt>
+in your path.
 
 <p>Then, run the entire test suite by running make in the <tt>test-suite</tt>
-directory:</p>
+subdirectory of your build directory:</p>
 
 <div class="doc_code">
 <pre>
-% cd projects/test-suite
+% cd <i>where-you-built-llvm</i>/projects/test-suite
 % gmake
 </pre>
 </div>
 
-<p>Usually, running the "nightly" set of tests is a good idea, and you can also
+<p>Usually, running the "simple" set of tests is a good idea, and you can also
 let it generate a report by running:</p>
 
 <div class="doc_code">
 <pre>
-% cd projects/test-suite
-% gmake TEST=nightly report report.html
+% cd <i>where-you-built-llvm</i>/projects/test-suite
+% gmake TEST=simple report report.html
 </pre>
 </div>
 
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 20516a7..41f41e8 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -2005,7 +2005,8 @@ int main(int argc, char *argv[]) {
   }
   
   // If not set, exception handling will not be turned on
-  llvm::JITExceptionHandling = true;
+  llvm::TargetOptions Opts;
+  Opts.JITExceptionHandling = true;
   
   llvm::InitializeNativeTarget();
   llvm::LLVMContext &context = llvm::getGlobalContext();
@@ -2018,6 +2019,7 @@ int main(int argc, char *argv[]) {
   llvm::EngineBuilder factory(module);
   factory.setEngineKind(llvm::EngineKind::JIT);
   factory.setAllocateGVsWithCode(false);
+  factory.setTargetOptions(Opts);
   llvm::ExecutionEngine *executionEngine = factory.create();
   
   {
diff --git a/examples/LLVMBuild.txt b/examples/LLVMBuild.txt
index 3014ae6..4d06ffb 100644
--- a/examples/LLVMBuild.txt
+++ b/examples/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Group
 name = Examples
 parent = $ROOT
-
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index 92bb03b..6b465b3 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -73,11 +73,12 @@ void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef RI);
 // SymbolRef accessors
 const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI);
 uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI);
-uint64_t LLVMGetSymbolOffset(LLVMSymbolIteratorRef SI);
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI);
 uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI);
 
 // RelocationRef accessors
 uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI);
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI);
 LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI);
 uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI);
 // NOTE: Caller takes ownership of returned string of the two
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 7afaef1..e4f177b 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -47,6 +47,21 @@ typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
   
+/* Declare all of the available assembly printer initialization functions. */
+#define LLVM_ASM_PRINTER(TargetName) \
+  void LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+
+/* Declare all of the available assembly parser initialization functions. */
+#define LLVM_ASM_PARSER(TargetName) \
+  void LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+
+/* Declare all of the available disassembler initialization functions. */
+#define LLVM_DISASSEMBLER(TargetName) \
+  void LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+  
 /** LLVMInitializeAllTargetInfos - The main program should call this function if
     it wants access to all available targets that LLVM is configured to
     support. */
@@ -64,6 +79,43 @@ static inline void LLVMInitializeAllTargets(void) {
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
 }
+
+/** LLVMInitializeAllTargetMCs - The main program should call this function if
+    it wants access to all available target MC that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargetMCs(void) {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC();
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllAsmPrinters - The main program should call this function if
+    it wants all asm printers that LLVM is configured to support, to make them
+    available via the TargetRegistry. */
+static inline void LLVMInitializeAllAsmPrinters() {
+#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+#undef LLVM_ASM_PRINTER  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllAsmParsers - The main program should call this function if
+    it wants all asm parsers that LLVM is configured to support, to make them
+    available via the TargetRegistry. */
+static inline void LLVMInitializeAllAsmParsers() {
+#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+#undef LLVM_ASM_PARSER  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllDisassemblers - The main program should call this function
+    if it wants all disassemblers that LLVM is configured to support, to make
+    them available via the TargetRegistry. */
+static inline void LLVMInitializeAllDisassemblers() {
+#define LLVM_DISASSEMBLER(TargetName) \
+  LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+#undef LLVM_DISASSEMBLER  /* Explicit undef to make SWIG happier */
+}
   
 /** LLVMInitializeNativeTarget - The main program should call this function to
     initialize the native target corresponding to the host.  This is useful 
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 707e0db..78119bc 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -497,8 +497,8 @@ public:
     if (loBitsSet == APINT_BITS_PER_WORD)
       return APInt(numBits, -1ULL);
     // For small values, return quickly.
-    if (numBits < APINT_BITS_PER_WORD)
-      return APInt(numBits, (1ULL << loBitsSet) - 1);
+    if (loBitsSet <= APINT_BITS_PER_WORD)
+      return APInt(numBits, -1ULL >> (APINT_BITS_PER_WORD - loBitsSet));
     return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
   }
 
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 99ed15c..e502ac4 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -36,6 +36,7 @@ namespace llvm {
 /// for more information on the properties which the predicate function itself
 /// should satisfy.
 class DAGDeltaAlgorithm {
+  virtual void anchor();
 public:
   typedef unsigned change_ty;
   typedef std::pair<change_ty, change_ty> edge_ty;
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index 0fd1f50..823caef 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -43,9 +43,12 @@ struct GraphTraits {
   // typedef  ...iterator nodes_iterator;
   // static nodes_iterator nodes_begin(GraphType *G)
   // static nodes_iterator nodes_end  (GraphType *G)
-  //
   //    nodes_iterator/begin/end - Allow iteration over all nodes in the graph
 
+  // static unsigned       size       (GraphType *G)
+  //    Return total number of nodes in the graph
+  //
+
 
   // If anyone tries to use this class without having an appropriate
   // specialization, make an error.  If you get this error, it's because you
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 8757f00..106daf4 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -156,7 +156,12 @@ namespace llvm {
       other.Obj = Obj;
       Obj = tmp;
     }
-    
+
+    void reset() {
+      release();
+      Obj = 0;
+    }
+
     void resetWithoutRelease() {
       Obj = 0;
     }
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index b8a1a2f..b54d10b 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -27,6 +27,7 @@
 #define LLVM_ADT_STATISTIC_H
 
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Valgrind.h"
 
 namespace llvm {
 class raw_ostream;
@@ -110,6 +111,7 @@ protected:
     bool tmp = Initialized;
     sys::MemoryFence();
     if (!tmp) RegisterStatistic();
+    TsanHappensAfter(this);
     return *this;
   }
   void RegisterStatistic();
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index ee86d8b..e27dd4b 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -37,6 +37,15 @@ public:
       delete V;
   }
   
+  // implicit conversion operator to ArrayRef.
+  operator ArrayRef<EltTy>() const {
+    if (Val.isNull())
+      return ArrayRef<EltTy>();
+    if (Val.template is<EltTy>())
+      return *Val.template getAddrOf<EltTy>();
+    return *Val.template get<VecTy*>();
+  }
+  
   bool empty() const {
     // This vector can be empty if it contains no element, or if it
     // contains a pointer to an empty vector.
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 8a16018..4739fb5 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -45,6 +45,7 @@ public:
 
     arm,     // ARM; arm, armv.*, xscale
     cellspu, // CellSPU: spu, cellspu
+    hexagon, // Hexagon: hexagon
     mips,    // MIPS: mips, mipsallegrex
     mipsel,  // MIPSEL: mipsel, mipsallegrexel, psp
     mips64,  // MIPS64: mips64
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 05018fa..67bc2b3 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -25,6 +25,7 @@ namespace llvm {
   class ConstantExpr;
   class Instruction;
   class TargetData;
+  class TargetLibraryInfo;
   class Function;
   class Type;
   template<typename T>
@@ -35,13 +36,15 @@ namespace llvm {
 /// Note that this fails if not all of the operands are constant.  Otherwise,
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
-Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
+Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0,
+                                  const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                         const TargetData *TD = 0);
+                                         const TargetData *TD = 0,
+                                         const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified operands.  If successful, the constant result is returned, if not,
@@ -51,7 +54,8 @@ Constant *ConstantFoldConstantExpression(const ConstantExpr *CE,
 ///
 Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
                                    ArrayRef<Constant *> Ops,
-                                   const TargetData *TD = 0);
+                                   const TargetData *TD = 0,
+                                   const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
 /// instruction (icmp/fcmp) with the specified operands.  If it fails, it
@@ -59,7 +63,8 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
 ///
 Constant *ConstantFoldCompareInstOperands(unsigned Predicate,
                                           Constant *LHS, Constant *RHS,
-                                          const TargetData *TD = 0);
+                                          const TargetData *TD = 0,
+                                          const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue
 /// instruction with the specified operands and indices.  The constant result is
@@ -83,8 +88,8 @@ bool canConstantFoldCallTo(const Function *F);
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-Constant *
-ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands);
+Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                           const TargetLibraryInfo *TLI = 0);
 }
 
 #endif
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index ae552b0..0c29236 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -171,7 +171,7 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
 
   // it might be that some blocks did not get a DFS number (e.g., blocks of 
   // infinite loops). In these cases an artificial exit node is required.
-  MultipleRoots |= (DT.isPostDominator() && N != F.size());
+  MultipleRoots |= (DT.isPostDominator() && N != GraphTraits<FuncT*>::size(&F));
 
   // When naively implemented, the Lengauer-Tarjan algorithm requires a separate
   // bucket for each vertex. However, this is unnecessary, because each vertex
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 230e83d..15db2d1 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -653,21 +653,24 @@ public:
   /// recalculate - compute a dominator tree for the given function
   template<class FT>
   void recalculate(FT& F) {
+    typedef GraphTraits<FT*> TraitsTy;
     reset();
     this->Vertex.push_back(0);
 
     if (!this->IsPostDominators) {
       // Initialize root
-      this->Roots.push_back(&F.front());
-      this->IDoms[&F.front()] = 0;
-      this->DomTreeNodes[&F.front()] = 0;
+      NodeT *entry = TraitsTy::getEntryNode(&F);
+      this->Roots.push_back(entry);
+      this->IDoms[entry] = 0;
+      this->DomTreeNodes[entry] = 0;
 
       Calculate<FT, NodeT*>(*this, F);
     } else {
       // Initialize the roots list
-      for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-        if (std::distance(GraphTraits<FT*>::child_begin(I),
-                          GraphTraits<FT*>::child_end(I)) == 0)
+      for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
+                                        E = TraitsTy::nodes_end(&F); I != E; ++I) {
+        if (std::distance(TraitsTy::child_begin(I),
+                          TraitsTy::child_end(I)) == 0)
           addRoot(I);
 
         // Prepopulate maps so that we don't get iterator invalidation issues later.
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index c1d87d3..3dd194c 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -24,95 +24,117 @@ namespace llvm {
   class Instruction;
   class Value;
   class TargetData;
+  class TargetLibraryInfo;
   template<typename T>
   class ArrayRef;
 
   /// SimplifyAddInst - Given operands for an Add, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifySubInst - Given operands for a Sub, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyMulInst - Given operands for a Mul, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyMulInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifySDivInst - Given operands for an SDiv, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifySDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyUDivInst - Given operands for a UDiv, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFDivInst - Given operands for an FDiv, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifySRemInst - Given operands for an SRem, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+  Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyURemInst - Given operands for a URem, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyURemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFRemInst - Given operands for an FRem, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyShlInst - Given operands for a Shl, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0, 
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyLShrInst - Given operands for a LShr, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                          const TargetData *TD = 0, const DominatorTree *DT=0);
+                          const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
+                          const DominatorTree *DT = 0);
 
   /// SimplifyAShrInst - Given operands for a AShr, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                           const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyAndInst - Given operands for an And, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAndInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifyOrInst - Given operands for an Or, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyOrInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                        const TargetLibraryInfo *TLI = 0,
                         const DominatorTree *DT = 0);
 
   /// SimplifyXorInst - Given operands for a Xor, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyXorInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0,
+                          const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0,
+                          const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
@@ -123,8 +145,8 @@ namespace llvm {
 
   /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyGEPInst(ArrayRef<Value *> Ops,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+  Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
   /// can fold the result.  If not, this returns null.
@@ -139,16 +161,21 @@ namespace llvm {
   /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                       const TargetData *TD = 0, const DominatorTree *DT = 0);
+                       const TargetData *TD = 0, 
+                       const TargetLibraryInfo *TLI = 0,
+                       const DominatorTree *DT = 0);
 
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
   Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0,
+                             const TargetLibraryInfo *TLI = 0,
                              const DominatorTree *DT = 0);
 
 
@@ -160,6 +187,7 @@ namespace llvm {
   ///
   void ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
                                  const TargetData *TD = 0,
+                                 const TargetLibraryInfo *TLI = 0,
                                  const DominatorTree *DT = 0);
 } // end namespace llvm
 
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index fc4d0af..065c230 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -20,12 +20,14 @@
 namespace llvm {
   class Constant;
   class TargetData;
+  class TargetLibraryInfo;
   class Value;
   
 /// LazyValueInfo - This pass computes, caches, and vends lazy value constraint
 /// information.
 class LazyValueInfo : public FunctionPass {
   class TargetData *TD;
+  class TargetLibraryInfo *TLI;
   void *PImpl;
   LazyValueInfo(const LazyValueInfo&); // DO NOT IMPLEMENT.
   void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
@@ -68,9 +70,7 @@ public:
   
   // Implementation boilerplate.
   
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-  }
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   virtual void releaseMemory();
   virtual bool runOnFunction(Function &F);
 };
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 535d205..8717352 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -23,7 +23,6 @@
 //  * whether or not a particular block branches out of the loop
 //  * the successor blocks of the loop
 //  * the loop depth
-//  * the trip count
 //  * etc...
 //
 //===----------------------------------------------------------------------===//
@@ -587,37 +586,6 @@ public:
   ///
   PHINode *getCanonicalInductionVariable() const;
 
-  /// getTripCount - Return a loop-invariant LLVM value indicating the number of
-  /// times the loop will be executed.  Note that this means that the backedge
-  /// of the loop executes N-1 times.  If the trip-count cannot be determined,
-  /// this returns null.
-  ///
-  /// The IndVarSimplify pass transforms loops to have a form that this
-  /// function easily understands.
-  ///
-  Value *getTripCount() const;
-
-  /// getSmallConstantTripCount - Returns the trip count of this loop as a
-  /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-  /// of not constant. Will also return 0 if the trip count is very large
-  /// (>= 2^32)
-  ///
-  /// The IndVarSimplify pass transforms loops to have a form that this
-  /// function easily understands.
-  ///
-  unsigned getSmallConstantTripCount() const;
-
-  /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-  /// trip count of this loop as a normal unsigned value, if possible. This
-  /// means that the actual trip count is always a multiple of the returned
-  /// value (don't forget the trip count could very well be zero as well!).
-  ///
-  /// Returns 1 if the trip count is unknown or not guaranteed to be the
-  /// multiple of a constant (which is also the case if the trip count is simply
-  /// constant, use getSmallConstantTripCount for that case), Will also return 1
-  /// if the trip count is very large (>= 2^32).
-  unsigned getSmallConstantTripMultiple() const;
-
   /// isLCSSAForm - Return true if the Loop is in LCSSA form
   bool isLCSSAForm(DominatorTree &DT) const;
 
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 033efba..ff9a247 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -20,7 +20,8 @@
 namespace llvm {
   class DominatorTree;
   class TargetData;
-  
+  class TargetLibraryInfo;
+
 /// PHITransAddr - An address value which tracks and handles phi translation.
 /// As we walk "up" the CFG through predecessors, we need to ensure that the
 /// address we're tracking is kept up to date.  For example, if we're analyzing
@@ -37,11 +38,14 @@ class PHITransAddr {
   
   /// TD - The target data we are playing with if known, otherwise null.
   const TargetData *TD;
+
+  /// TLI - The target library info if known, otherwise null.
+  const TargetLibraryInfo *TLI;
   
   /// InstInputs - The inputs for our symbolic address.
   SmallVector<Instruction*, 4> InstInputs;
 public:
-  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td) {
+  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td), TLI(0) {
     // If the address is an instruction, the whole thing is considered an input.
     if (Instruction *I = dyn_cast<Instruction>(Addr))
       InstInputs.push_back(I);
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 10d933e..8661787 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -41,6 +41,7 @@ namespace llvm {
   class Type;
   class ScalarEvolution;
   class TargetData;
+  class TargetLibraryInfo;
   class LLVMContext;
   class Loop;
   class LoopInfo;
@@ -224,6 +225,10 @@ namespace llvm {
     ///
     TargetData *TD;
 
+    /// TLI - The target library information for the target we are targeting.
+    ///
+    TargetLibraryInfo *TLI;
+
     /// DT - The dominator tree.
     ///
     DominatorTree *DT;
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index a4ad145..cd7e7f1 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -244,6 +244,8 @@ namespace llvm {
                                        const Loop *L,
                                        Type *ExpandTy,
                                        Type *IntTy);
+    Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+                       Type *ExpandTy, Type *IntTy, bool useSubtract);
   };
 }
 
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 6f82b2c..85c659c 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -156,6 +156,27 @@ namespace llvm {
   /// are lifetime markers.
   bool onlyUsedByLifetimeMarkers(const Value *V);
 
+  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
+  /// have any effects besides calculating the result and does not have
+  /// undefined behavior.
+  ///
+  /// This method never returns true for an instruction that returns true for
+  /// mayHaveSideEffects; however, this method also does some other checks in
+  /// addition. It checks for undefined behavior, like dividing by zero or
+  /// loading from an invalid pointer (but not for undefined results, like a
+  /// shift with a shift amount larger than the width of the result). It checks
+  /// for malloc and alloca because speculatively executing them might cause a
+  /// memory leak. It also returns false for instructions related to control
+  /// flow, specifically terminators and PHI nodes.
+  ///
+  /// This method only looks at the instruction itself and its operands, so if
+  /// this method returns true, it is safe to move the instruction as long as
+  /// the correct dominance relationships for the operands and users hold.
+  /// However, this method can return true for instructions that read memory;
+  /// for such instructions, moving them may change the resulting value.
+  bool isSafeToSpeculativelyExecute(const Instruction *Inst,
+                                    const TargetData *TD = 0);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/AutoUpgrade.h b/include/llvm/AutoUpgrade.h
index 8ca3548..e13c4c1 100644
--- a/include/llvm/AutoUpgrade.h
+++ b/include/llvm/AutoUpgrade.h
@@ -39,14 +39,6 @@ namespace llvm {
   /// This checks for global variables which should be upgraded. It returns true
   /// if it requires upgrading.
   bool UpgradeGlobalVariable(GlobalVariable *GV);
-
-  /// This function checks debug info intrinsics. If an intrinsic is invalid
-  /// then this function simply removes the intrinsic. 
-  void CheckDebugInfoIntrinsics(Module *M);
-  
-  /// This function upgrades the old pre-3.0 exception handling system to the
-  /// new one. N.B. This will be removed in 3.1.
-  void UpgradeExceptionHandling(Module *M);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 214d1d7..48d1695 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -29,23 +29,21 @@ namespace bitc {
 
     // Module sub-block id's.
     PARAMATTR_BLOCK_ID,
-    
-    /// TYPE_BLOCK_ID_OLD - This is the type descriptor block in LLVM 2.9 and
-    /// earlier, replaced with TYPE_BLOCK_ID2.  FIXME: Remove in LLVM 3.1.
-    TYPE_BLOCK_ID_OLD,
+
+    UNUSED_ID1,
     
     CONSTANTS_BLOCK_ID,
     FUNCTION_BLOCK_ID,
     
-    /// TYPE_SYMTAB_BLOCK_ID_OLD - This type descriptor is from LLVM 2.9 and
-    /// earlier bitcode files.  FIXME: Remove in LLVM 3.1
-    TYPE_SYMTAB_BLOCK_ID_OLD,
+    UNUSED_ID2,
     
     VALUE_SYMTAB_BLOCK_ID,
     METADATA_BLOCK_ID,
     METADATA_ATTACHMENT_ID,
     
-    TYPE_BLOCK_ID_NEW
+    TYPE_BLOCK_ID_NEW,
+
+    USELIST_BLOCK_ID
   };
 
 
@@ -63,10 +61,10 @@ namespace bitc {
     MODULE_CODE_GLOBALVAR   = 7,
 
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
-    //             section, visibility]
+    //             section, visibility, gc, unnamed_addr]
     MODULE_CODE_FUNCTION    = 8,
 
-    // ALIAS: [alias type, aliasee val#, linkage]
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
     MODULE_CODE_ALIAS       = 9,
 
     /// MODULE_CODE_PURGEVALS: [numvals]
@@ -92,11 +90,12 @@ namespace bitc {
     TYPE_CODE_OPAQUE   =  6,    // OPAQUE
     TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
     TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
-    TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty, paramty x N]
+
+    TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
+                                //            paramty x N]
+    
+    // Code #10 is unused.
     
-    // FIXME: This is the encoding used for structs in LLVM 2.9 and earlier.
-    // REMOVE this in LLVM 3.1
-    TYPE_CODE_STRUCT_OLD = 10,  // STRUCT: [ispacked, eltty x N]
     TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
     TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
 
@@ -316,6 +315,10 @@ namespace bitc {
     FUNC_CODE_INST_STOREATOMIC = 42  // STORE: [ptrty,ptr,val, align, vol
                                      //         ordering, synchscope]
   };
+
+  enum UseListCodes {
+    USELIST_CODE_ENTRY = 1   // USELIST_CODE_ENTRY: TBD.
+  };
 } // End bitc namespace
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index d8e6407..fda801c 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -70,6 +70,10 @@ bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
 ///
 ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred);
 
+/// getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats,
+/// return the equivalent code if we're allowed to assume that NaNs won't occur.
+ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
+
 /// getICmpCondCode - Return the ISD condition code corresponding to
 /// the given LLVM IR integer condition code.
 ///
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
new file mode 100644
index 0000000..0a11e0c
--- /dev/null
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -0,0 +1,78 @@
+//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DFAPACKETIZER_H
+#define LLVM_CODEGEN_DFAPACKETIZER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class MCInstrDesc;
+class MachineInstr;
+class InstrItineraryData;
+
+class DFAPacketizer {
+private:
+  typedef std::pair<unsigned, unsigned> UnsignPair;
+  const InstrItineraryData *InstrItins;
+  int CurrentState;
+  const int (*DFAStateInputTable)[2];
+  const unsigned *DFAStateEntryTable;
+
+  // CachedTable is a map from <FromState, Input> to ToState.
+  DenseMap<UnsignPair, unsigned> CachedTable;
+
+  // ReadTable - Read the DFA transition table and update CachedTable.
+  void ReadTable(unsigned int state);
+
+public:
+  DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+                const unsigned *SET);
+
+  // Reset the current state to make all resources available.
+  void clearResources() {
+    CurrentState = 0;
+  }
+
+  // canReserveResources - Check if the resources occupied by a MCInstrDesc
+  // are available in the current state.
+  bool canReserveResources(const llvm::MCInstrDesc *MID);
+
+  // reserveResources - Reserve the resources occupied by a MCInstrDesc and
+  // change the current state to reflect that change.
+  void reserveResources(const llvm::MCInstrDesc *MID);
+
+  // canReserveResources - Check if the resources occupied by a machine
+  // instruction are available in the current state.
+  bool canReserveResources(llvm::MachineInstr *MI);
+
+  // reserveResources - Reserve the resources occupied by a machine
+  // instruction and change the current state to reflect that change.
+  void reserveResources(llvm::MachineInstr *MI);
+};
+}
+
+#endif
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 5cb7d8a..e57c8b1 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -361,6 +361,8 @@ private:
 
   bool SelectExtractValue(const User *I);
 
+  bool SelectInsertValue(const User *I);
+
   /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
   /// Emit code to ensure constants are copied into registers when needed.
   /// Remember the virtual registers that need to be added to the Machine PHI
@@ -381,6 +383,10 @@ private:
 
   /// hasTrivialKill - Test whether the given value has exactly one use.
   bool hasTrivialKill(const Value *V) const;
+
+  /// removeDeadCode - Remove all dead instructions between the I and E.
+  void removeDeadCode(MachineBasicBlock::iterator I,
+                      MachineBasicBlock::iterator E);
 };
 
 }
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 184e96d..cb990ed 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -107,13 +107,6 @@ namespace ISD {
     // and returns an outchain.
     EH_SJLJ_LONGJMP,
 
-    // OUTCHAIN = EH_SJLJ_DISPATCHSETUP(INCHAIN, setjmpval)
-    // This corresponds to the eh.sjlj.dispatchsetup intrinsic. It takes an
-    // input chain and the value returning from setjmp as inputs and returns an
-    // outchain. By default, this does nothing. Targets can lower this to unwind
-    // setup code if needed.
-    EH_SJLJ_DISPATCHSETUP,
-
     // TargetConstant* - Like Constant*, but the DAG does not do any folding,
     // simplification, or lowering of the constant. They are used for constants
     // which are known to fit in the immediate fields of their users, or for
@@ -319,6 +312,9 @@ namespace ISD {
     /// Byte Swap and Counting operators.
     BSWAP, CTTZ, CTLZ, CTPOP,
 
+    /// Bit counting operators with an undefined result for zero inputs.
+    CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
+
     // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
     // i1 then the high bits must conform to getBooleanContents.
     SELECT,
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 5a20e95..10a32d3 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -84,8 +84,9 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
 
   /// Alignment - Alignment of the basic block. Zero if the basic block does
   /// not need to be aligned.
+  /// The alignment is specified as log2(bytes).
   unsigned Alignment;
-  
+
   /// IsLandingPad - Indicate that this basic block is entered via an
   /// exception handler.
   bool IsLandingPad;
@@ -128,10 +129,89 @@ public:
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  typedef Instructions::iterator                              iterator;
-  typedef Instructions::const_iterator                  const_iterator;
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>             reverse_iterator;
+
+  /// bundle_iterator - MachineBasicBlock iterator that automatically skips over
+  /// MIs that are inside bundles (i.e. walk top level MIs only).
+  template<typename Ty, typename IterTy>
+  class bundle_iterator
+    : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
+    IterTy MII;
+
+  public:
+    bundle_iterator(IterTy mii) : MII(mii) {
+      assert(!MII->isInsideBundle() &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+
+    bundle_iterator(Ty &mi) : MII(mi) {
+      assert(!mi.isInsideBundle() &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+    bundle_iterator(Ty *mi) : MII(mi) {
+      assert((!mi || !mi->isInsideBundle()) &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+    bundle_iterator(const bundle_iterator &I) : MII(I.MII) {}
+    bundle_iterator() : MII(0) {}
+
+    Ty &operator*() const { return *MII; }
+    Ty *operator->() const { return &operator*(); }
+
+    operator Ty*() const { return MII; }
+
+    bool operator==(const bundle_iterator &x) const {
+      return MII == x.MII;
+    }
+    bool operator!=(const bundle_iterator &x) const {
+      return !operator==(x);
+    }
+    
+    // Increment and decrement operators...
+    bundle_iterator &operator--() {      // predecrement - Back up
+      do {
+        --MII;
+      } while (MII->isInsideBundle());
+      return *this;
+    }
+    bundle_iterator &operator++() {      // preincrement - Advance
+      do {
+        ++MII;
+      } while (MII->isInsideBundle());
+      return *this;
+    }
+    bundle_iterator operator--(int) {    // postdecrement operators...
+      bundle_iterator tmp = *this;
+      do {
+        --MII;
+      } while (MII->isInsideBundle());
+      return tmp;
+    }
+    bundle_iterator operator++(int) {    // postincrement operators...
+      bundle_iterator tmp = *this;
+      do {
+        ++MII;
+      } while (MII->isInsideBundle());
+      return tmp;
+    }
+
+    IterTy getInstrIterator() const {
+      return MII;
+    }    
+  };
+
+  typedef Instructions::iterator                                 instr_iterator;
+  typedef Instructions::const_iterator                     const_instr_iterator;
+  typedef std::reverse_iterator<instr_iterator>          reverse_instr_iterator;
+  typedef
+  std::reverse_iterator<const_instr_iterator>      const_reverse_instr_iterator;
+
+  typedef
+  bundle_iterator<MachineInstr,instr_iterator>                         iterator;
+  typedef
+  bundle_iterator<const MachineInstr,const_instr_iterator>       const_iterator;
+  typedef std::reverse_iterator<const_iterator>          const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>                      reverse_iterator;
+
 
   unsigned size() const { return (unsigned)Insts.size(); }
   bool empty() const { return Insts.empty(); }
@@ -141,15 +221,53 @@ public:
   const MachineInstr& front() const { return Insts.front(); }
   const MachineInstr& back()  const { return Insts.back(); }
 
+  instr_iterator                instr_begin()       { return Insts.begin();  }
+  const_instr_iterator          instr_begin() const { return Insts.begin();  }
+  instr_iterator                  instr_end()       { return Insts.end();    }
+  const_instr_iterator            instr_end() const { return Insts.end();    }
+  reverse_instr_iterator       instr_rbegin()       { return Insts.rbegin(); }
+  const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
+  reverse_instr_iterator       instr_rend  ()       { return Insts.rend();   }
+  const_reverse_instr_iterator instr_rend  () const { return Insts.rend();   }
+
   iterator                begin()       { return Insts.begin();  }
   const_iterator          begin() const { return Insts.begin();  }
-  iterator                  end()       { return Insts.end();    }
-  const_iterator            end() const { return Insts.end();    }
-  reverse_iterator       rbegin()       { return Insts.rbegin(); }
-  const_reverse_iterator rbegin() const { return Insts.rbegin(); }
+  iterator                  end()       {
+    instr_iterator II = instr_end();
+    if (II != instr_begin()) {
+      while (II->isInsideBundle())
+        --II;
+    }
+    return II;
+  }
+  const_iterator            end() const {
+    const_instr_iterator II = instr_end();
+    if (II != instr_begin()) {
+      while (II->isInsideBundle())
+        --II;
+    }
+    return II;
+  }
+  reverse_iterator       rbegin()       {
+    reverse_instr_iterator II = instr_rbegin();
+    if (II != instr_rend()) {
+      while (II->isInsideBundle())
+        ++II;
+    }
+    return II;
+  }
+  const_reverse_iterator rbegin() const {
+    const_reverse_instr_iterator II = instr_rbegin();
+    if (II != instr_rend()) {
+      while (II->isInsideBundle())
+        ++II;
+    }
+    return II;
+  }
   reverse_iterator       rend  ()       { return Insts.rend();   }
   const_reverse_iterator rend  () const { return Insts.rend();   }
 
+
   // Machine-CFG iterators
   typedef std::vector<MachineBasicBlock *>::iterator       pred_iterator;
   typedef std::vector<MachineBasicBlock *>::const_iterator const_pred_iterator;
@@ -219,10 +337,12 @@ public:
   bool            livein_empty() const { return LiveIns.empty(); }
 
   /// getAlignment - Return alignment of the basic block.
+  /// The alignment is specified as log2(bytes).
   ///
   unsigned getAlignment() const { return Alignment; }
 
   /// setAlignment - Set alignment of the basic block.
+  /// The alignment is specified as log2(bytes).
   ///
   void setAlignment(unsigned Align) { Alignment = Align; }
 
@@ -320,18 +440,16 @@ public:
   /// instruction of this basic block. If a terminator does not exist,
   /// it returns end()
   iterator getFirstTerminator();
+  const_iterator getFirstTerminator() const;
 
-  const_iterator getFirstTerminator() const {
-    return const_cast<MachineBasicBlock*>(this)->getFirstTerminator();
-  }
+  /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles
+  /// and return an instr_iterator instead.
+  instr_iterator getFirstInstrTerminator();
 
   /// getLastNonDebugInstr - returns an iterator to the last non-debug
   /// instruction in the basic block, or end()
   iterator getLastNonDebugInstr();
-
-  const_iterator getLastNonDebugInstr() const {
-    return const_cast<MachineBasicBlock*>(this)->getLastNonDebugInstr();
-  }
+  const_iterator getLastNonDebugInstr() const;
 
   /// SplitCriticalEdge - Split the critical edge from this block to the
   /// given successor block, and return the newly created block, or null
@@ -344,32 +462,82 @@ public:
   void pop_front() { Insts.pop_front(); }
   void pop_back() { Insts.pop_back(); }
   void push_back(MachineInstr *MI) { Insts.push_back(MI); }
+
   template<typename IT>
-  void insert(iterator I, IT S, IT E) { Insts.insert(I, S, E); }
-  iterator insert(iterator I, MachineInstr *M) { return Insts.insert(I, M); }
-  iterator insertAfter(iterator I, MachineInstr *M) { 
+  void insert(instr_iterator I, IT S, IT E) {
+    Insts.insert(I, S, E);
+  }
+  instr_iterator insert(instr_iterator I, MachineInstr *M) {
+    return Insts.insert(I, M);
+  }
+  instr_iterator insertAfter(instr_iterator I, MachineInstr *M) { 
     return Insts.insertAfter(I, M); 
   }
 
-  // erase - Remove the specified element or range from the instruction list.
-  // These functions delete any instructions removed.
-  //
-  iterator erase(iterator I)             { return Insts.erase(I); }
-  iterator erase(iterator I, iterator E) { return Insts.erase(I, E); }
-  MachineInstr *remove(MachineInstr *I)  { return Insts.remove(I); }
-  void clear()                           { Insts.clear(); }
+  template<typename IT>
+  void insert(iterator I, IT S, IT E) {
+    Insts.insert(I.getInstrIterator(), S, E);
+  }
+  iterator insert(iterator I, MachineInstr *M) {
+    return Insts.insert(I.getInstrIterator(), M);
+  }
+  iterator insertAfter(iterator I, MachineInstr *M) { 
+    return Insts.insertAfter(I.getInstrIterator(), M); 
+  }
+
+  /// erase - Remove the specified element or range from the instruction list.
+  /// These functions delete any instructions removed.
+  ///
+  instr_iterator erase(instr_iterator I) {
+    return Insts.erase(I);
+  }
+  instr_iterator erase(instr_iterator I, instr_iterator E) {
+    return Insts.erase(I, E);
+  }
+  instr_iterator erase_instr(MachineInstr *I) {
+    instr_iterator MII(I);
+    return erase(MII);
+  }
+
+  iterator erase(iterator I);
+  iterator erase(iterator I, iterator E) {
+    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+  }
+  iterator erase(MachineInstr *I) {
+    iterator MII(I);
+    return erase(MII);
+  }
+
+  /// remove - Remove the instruction from the instruction list. This function
+  /// does not delete the instruction. WARNING: Note, if the specified
+  /// instruction is a bundle this function will remove all the bundled
+  /// instructions as well. It is up to the caller to keep a list of the
+  /// bundled instructions and re-insert them if desired. This function is
+  /// *not recommended* for manipulating instructions with bundled. Use
+  /// splice instead.
+  MachineInstr *remove(MachineInstr *I);
+  void clear() {
+    Insts.clear();
+  }
 
   /// splice - Take an instruction from MBB 'Other' at the position From,
   /// and insert it into this MBB right before 'where'.
-  void splice(iterator where, MachineBasicBlock *Other, iterator From) {
+  void splice(instr_iterator where, MachineBasicBlock *Other,
+              instr_iterator From) {
     Insts.splice(where, Other->Insts, From);
   }
+  void splice(iterator where, MachineBasicBlock *Other, iterator From);
 
   /// splice - Take a block of instructions from MBB 'Other' in the range [From,
   /// To), and insert them into this MBB right before 'where'.
+  void splice(instr_iterator where, MachineBasicBlock *Other, instr_iterator From,
+              instr_iterator To) {
+    Insts.splice(where, Other->Insts, From, To);
+  }
   void splice(iterator where, MachineBasicBlock *Other, iterator From,
               iterator To) {
-    Insts.splice(where, Other->Insts, From, To);
+    Insts.splice(where.getInstrIterator(), Other->Insts,
+                 From.getInstrIterator(), To.getInstrIterator());
   }
 
   /// removeFromParent - This method unlinks 'this' from the containing
@@ -396,7 +564,10 @@ public:
 
   /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
   /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
-  DebugLoc findDebugLoc(MachineBasicBlock::iterator &MBBI);
+  DebugLoc findDebugLoc(instr_iterator MBBI);
+  DebugLoc findDebugLoc(iterator MBBI) {
+    return findDebugLoc(MBBI.getInstrIterator());
+  }
 
   // Debugging methods.
   void dump() const;
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index b347ca8..44402a9 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -465,7 +465,7 @@ public:
   bool isSpillSlotObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
-    return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;;
+    return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;
   }
 
   /// isDeadObjectIndex - Returns true if the specified index corresponds to
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 6e08f7b..3a4568a 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -187,7 +187,7 @@ public:
   ///
   void setAlignment(unsigned A) { Alignment = A; }
 
-  /// EnsureAlignment - Make sure the function is at least 'A' bits aligned.
+  /// EnsureAlignment - Make sure the function is at least 1 << A bytes aligned.
   void EnsureAlignment(unsigned A) {
     if (Alignment < A) Alignment = A;
   }
@@ -437,6 +437,7 @@ template <> struct GraphTraits<MachineFunction*> :
   typedef MachineFunction::iterator nodes_iterator;
   static nodes_iterator nodes_begin(MachineFunction *F) { return F->begin(); }
   static nodes_iterator nodes_end  (MachineFunction *F) { return F->end(); }
+  static unsigned       size       (MachineFunction *F) { return F->size(); }
 };
 template <> struct GraphTraits<const MachineFunction*> :
   public GraphTraits<const MachineBasicBlock*> {
@@ -452,6 +453,9 @@ template <> struct GraphTraits<const MachineFunction*> :
   static nodes_iterator nodes_end  (const MachineFunction *F) {
     return F->end();
   }
+  static unsigned       size       (const MachineFunction *F)  {
+    return F->size();
+  }
 };
 
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index cae38f3..904f1a6 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -53,9 +53,11 @@ public:
   };
 
   enum MIFlag {
-    NoFlags    = 0,
-    FrameSetup = 1 << 0                 // Instruction is used as a part of
+    NoFlags      = 0,
+    FrameSetup   = 1 << 0,              // Instruction is used as a part of
                                         // function frame setup code.
+    InsideBundle = 1 << 1               // Instruction is inside a bundle (not
+                                        // the first MI in a bundle)
   };
 private:
   const MCInstrDesc *MCID;              // Instruction descriptor.
@@ -148,6 +150,12 @@ public:
     AsmPrinterFlags |= (uint8_t)Flag;
   }
 
+  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  ///
+  void clearAsmPrinterFlag(CommentFlag Flag) {
+    AsmPrinterFlags &= ~Flag;
+  }
+
   /// getFlags - Return the MI flags bitvector.
   uint8_t getFlags() const {
     return Flags;
@@ -167,10 +175,58 @@ public:
     Flags = flags;
   }
 
-  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  /// clearFlag - Clear a MI flag.
+  void clearFlag(MIFlag Flag) {
+    Flags &= ~((uint8_t)Flag);
+  }
+
+  /// isInsideBundle - Return true if MI is in a bundle (but not the first MI
+  /// in a bundle).
   ///
-  void clearAsmPrinterFlag(CommentFlag Flag) {
-    AsmPrinterFlags &= ~Flag;
+  /// A bundle looks like this before it's finalized:
+  ///   ----------------
+  ///   |      MI      |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * | 
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * | 
+  ///   ----------------
+  /// In this case, the first MI starts a bundle but is not inside a bundle, the
+  /// next 2 MIs are considered "inside" the bundle.
+  ///
+  /// After a bundle is finalized, it looks like this:
+  ///   ----------------
+  ///   |    Bundle    |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * | 
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * | 
+  ///   ----------------
+  /// The first instruction has the special opcode "BUNDLE". It's not "inside"
+  /// a bundle, but the next three MIs are.
+  bool isInsideBundle() const {
+    return getFlag(InsideBundle);
+  }
+
+  /// setIsInsideBundle - Set InsideBundle bit.
+  ///
+  void setIsInsideBundle(bool Val = true) {
+    if (Val)
+      setFlag(InsideBundle);
+    else
+      clearFlag(InsideBundle);
   }
 
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
@@ -232,6 +288,272 @@ public:
     return MemRefsEnd - MemRefs == 1;
   }
 
+  /// API for querying MachineInstr properties. They are the same as MCInstrDesc
+  /// queries but they are bundle aware.
+
+  enum QueryType {
+    IgnoreBundle,    // Ignore bundles
+    AnyInBundle,     // Return true if any instruction in bundle has property
+    AllInBundle      // Return true if all instructions in bundle have property
+  };
+
+  /// hasProperty - Return true if the instruction (or in the case of a bundle,
+  /// the instructions inside the bundle) has the specified property.
+  /// The first argument is the property being queried.
+  /// The second argument indicates whether the query should look inside
+  /// instruction bundles.
+  /// If the third argument is true, than the query can return true when *any*
+  /// of the bundled instructions has the queried property. If it's false, then
+  /// this can return true iff *all* of the instructions have the property.
+  bool hasProperty(unsigned Flag, QueryType Type = AnyInBundle) const;
+
+  /// isVariadic - Return true if this instruction can have a variable number of
+  /// operands.  In this case, the variable operands will be after the normal
+  /// operands but before the implicit definitions and uses (if any are
+  /// present).
+  bool isVariadic(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Variadic, Type);
+  }
+
+  /// hasOptionalDef - Set if this instruction has an optional definition, e.g.
+  /// ARM instructions which can set condition code if 's' bit is set.
+  bool hasOptionalDef(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::HasOptionalDef, Type);
+  }
+
+  /// isPseudo - Return true if this is a pseudo instruction that doesn't
+  /// correspond to a real machine instruction.
+  ///
+  bool isPseudo(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Pseudo, Type);
+  }
+
+  bool isReturn(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Return, Type);
+  }
+
+  bool isCall(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Call, Type);
+  }
+
+  /// isBarrier - Returns true if the specified instruction stops control flow
+  /// from executing the instruction immediately following it.  Examples include
+  /// unconditional branches and return instructions.
+  bool isBarrier(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Barrier, Type);
+  }
+
+  /// isTerminator - Returns true if this instruction part of the terminator for
+  /// a basic block.  Typically this is things like return and branch
+  /// instructions.
+  ///
+  /// Various passes use this to insert code into the bottom of a basic block,
+  /// but before control flow occurs.
+  bool isTerminator(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Terminator, Type);
+  }
+
+  /// isBranch - Returns true if this is a conditional, unconditional, or
+  /// indirect branch.  Predicates below can be used to discriminate between
+  /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
+  /// get more information.
+  bool isBranch(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Branch, Type);
+  }
+
+  /// isIndirectBranch - Return true if this is an indirect branch, such as a
+  /// branch through a register.
+  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::IndirectBranch, Type);
+  }
+
+  /// isConditionalBranch - Return true if this is a branch which may fall
+  /// through to the next instruction or may transfer control flow to some other
+  /// block.  The TargetInstrInfo::AnalyzeBranch method can be used to get more
+  /// information about this branch.
+  bool isConditionalBranch(QueryType Type = AnyInBundle) const {
+    return isBranch(Type) & !isBarrier(Type) & !isIndirectBranch(Type);
+  }
+
+  /// isUnconditionalBranch - Return true if this is a branch which always
+  /// transfers control flow to some other block.  The
+  /// TargetInstrInfo::AnalyzeBranch method can be used to get more information
+  /// about this branch.
+  bool isUnconditionalBranch(QueryType Type = AnyInBundle) const {
+    return isBranch(Type) & isBarrier(Type) & !isIndirectBranch(Type);
+  }
+
+  // isPredicable - Return true if this instruction has a predicate operand that
+  // controls execution.  It may be set to 'always', or may be set to other
+  /// values.   There are various methods in TargetInstrInfo that can be used to
+  /// control and modify the predicate in this instruction.
+  bool isPredicable(QueryType Type = AllInBundle) const {
+    // If it's a bundle than all bundled instructions must be predicable for this
+    // to return true.
+    return hasProperty(MCID::Predicable, Type);
+  }
+
+  /// isCompare - Return true if this instruction is a comparison.
+  bool isCompare(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Compare, Type);
+  }
+
+  /// isMoveImmediate - Return true if this instruction is a move immediate
+  /// (including conditional moves) instruction.
+  bool isMoveImmediate(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::MoveImm, Type);
+  }
+
+  /// isBitcast - Return true if this instruction is a bitcast instruction.
+  ///
+  bool isBitcast(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Bitcast, Type);
+  }
+
+  /// isNotDuplicable - Return true if this instruction cannot be safely
+  /// duplicated.  For example, if the instruction has a unique labels attached
+  /// to it, duplicating it would cause multiple definition errors.
+  bool isNotDuplicable(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::NotDuplicable, Type);
+  }
+
+  /// hasDelaySlot - Returns true if the specified instruction has a delay slot
+  /// which must be filled by the code generator.
+  bool hasDelaySlot(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::DelaySlot, Type);
+  }
+
+  /// canFoldAsLoad - Return true for instructions that can be folded as
+  /// memory operands in other instructions. The most common use for this
+  /// is instructions that are simple loads from memory that don't modify
+  /// the loaded value in any way, but it can also be used for instructions
+  /// that can be expressed as constant-pool loads, such as V_SETALLONES
+  /// on x86, to allow them to be folded when it is beneficial.
+  /// This should only be set on instructions that return a value in their
+  /// only virtual register definition.
+  bool canFoldAsLoad(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::FoldableAsLoad, Type);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Side Effect Analysis
+  //===--------------------------------------------------------------------===//
+
+  /// mayLoad - Return true if this instruction could possibly read memory.
+  /// Instructions with this flag set are not necessarily simple load
+  /// instructions, they may load a value and modify it, for example.
+  bool mayLoad(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::MayLoad, Type);
+  }
+
+
+  /// mayStore - Return true if this instruction could possibly modify memory.
+  /// Instructions with this flag set are not necessarily simple store
+  /// instructions, they may store a modified value based on their operands, or
+  /// may not actually modify anything, for example.
+  bool mayStore(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::MayStore, Type);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Flags that indicate whether an instruction can be modified by a method.
+  //===--------------------------------------------------------------------===//
+
+  /// isCommutable - Return true if this may be a 2- or 3-address
+  /// instruction (of the form "X = op Y, Z, ..."), which produces the same
+  /// result if Y and Z are exchanged.  If this flag is set, then the
+  /// TargetInstrInfo::commuteInstruction method may be used to hack on the
+  /// instruction.
+  ///
+  /// Note that this flag may be set on instructions that are only commutable
+  /// sometimes.  In these cases, the call to commuteInstruction will fail.
+  /// Also note that some instructions require non-trivial modification to
+  /// commute them.
+  bool isCommutable(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Commutable, Type);
+  }
+
+  /// isConvertibleTo3Addr - Return true if this is a 2-address instruction
+  /// which can be changed into a 3-address instruction if needed.  Doing this
+  /// transformation can be profitable in the register allocator, because it
+  /// means that the instruction can use a 2-address form if possible, but
+  /// degrade into a less efficient form if the source and dest register cannot
+  /// be assigned to the same register.  For example, this allows the x86
+  /// backend to turn a "shl reg, 3" instruction into an LEA instruction, which
+  /// is the same speed as the shift but has bigger code size.
+  ///
+  /// If this returns true, then the target must implement the
+  /// TargetInstrInfo::convertToThreeAddress method for this instruction, which
+  /// is allowed to fail if the transformation isn't valid for this specific
+  /// instruction (e.g. shl reg, 4 on x86).
+  ///
+  bool isConvertibleTo3Addr(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::ConvertibleTo3Addr, Type);
+  }
+
+  /// usesCustomInsertionHook - Return true if this instruction requires
+  /// custom insertion support when the DAG scheduler is inserting it into a
+  /// machine basic block.  If this is true for the instruction, it basically
+  /// means that it is a pseudo instruction used at SelectionDAG time that is
+  /// expanded out into magic code by the target when MachineInstrs are formed.
+  ///
+  /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
+  /// is used to insert this into the MachineBasicBlock.
+  bool usesCustomInsertionHook(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::UsesCustomInserter, Type);
+  }
+
+  /// hasPostISelHook - Return true if this instruction requires *adjustment*
+  /// after instruction selection by calling a target hook. For example, this
+  /// can be used to fill in ARM 's' optional operand depending on whether
+  /// the conditional flag register is used.
+  bool hasPostISelHook(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::HasPostISelHook, Type);
+  }
+
+  /// isRematerializable - Returns true if this instruction is a candidate for
+  /// remat.  This flag is deprecated, please don't use it anymore.  If this
+  /// flag is set, the isReallyTriviallyReMaterializable() method is called to
+  /// verify the instruction is really rematable.
+  bool isRematerializable(QueryType Type = AllInBundle) const {
+    // It's only possible to re-mat a bundle if all bundled instructions are
+    // re-materializable.
+    return hasProperty(MCID::Rematerializable, Type);
+  }
+
+  /// isAsCheapAsAMove - Returns true if this instruction has the same cost (or
+  /// less) than a move instruction. This is useful during certain types of
+  /// optimizations (e.g., remat during two-address conversion or machine licm)
+  /// where we would like to remat or hoist the instruction, but not if it costs
+  /// more than moving the instruction into the appropriate register. Note, we
+  /// are not marking copies from and to the same register class with this flag.
+  bool isAsCheapAsAMove(QueryType Type = AllInBundle) const {
+    // Only returns true for a bundle if all bundled instructions are cheap.
+    // FIXME: This probably requires a target hook.
+    return hasProperty(MCID::CheapAsAMove, Type);
+  }
+
+  /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::STRD's two source registers must be an
+  /// even / odd pair, ARM::STM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for sources of instructions with this flag.
+  bool hasExtraSrcRegAllocReq(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::ExtraSrcRegAllocReq, Type);
+  }
+
+  /// hasExtraDefRegAllocReq - Returns true if this instruction def operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::LDRD's two def registers must be an
+  /// even / odd pair, ARM::LDM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for definitions of instructions with this flag.
+  bool hasExtraDefRegAllocReq(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::ExtraDefRegAllocReq, Type);
+  }
+
+
   enum MICheckType {
     CheckDefs,      // Check all operands for equality
     CheckKillDead,  // Check all operands including kill / dead markers
@@ -281,6 +603,9 @@ public:
   bool isRegSequence() const {
     return getOpcode() == TargetOpcode::REG_SEQUENCE;
   }
+  bool isBundle() const {
+    return getOpcode() == TargetOpcode::BUNDLE;
+  }
   bool isCopy() const {
     return getOpcode() == TargetOpcode::COPY;
   }
@@ -300,6 +625,9 @@ public:
       getOperand(0).getSubReg() == getOperand(1).getSubReg();
   }
 
+  /// getBundleSize - Return the number of instructions inside the MI bundle.
+  unsigned getBundleSize() const;
+
   /// readsRegister - Return true if the MachineInstr reads the specified
   /// register. If TargetRegisterInfo is passed, then it also checks if there
   /// is a read of a super-register.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index b989027..8025642 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -209,6 +209,30 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
 }
 
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::instr_iterator I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID,
+                                   unsigned DestReg) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
+}
+
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineInstr *I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID,
+                                   unsigned DestReg) {
+  if (I->isInsideBundle()) {
+    MachineBasicBlock::instr_iterator MII = I;
+    return BuildMI(BB, MII, DL, MCID, DestReg);
+  }
+
+  MachineBasicBlock::iterator MII = I;
+  return BuildMI(BB, MII, DL, MCID, DestReg);
+}
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction before the given position in the given MachineBasicBlock, and
 /// does NOT take a destination register.
@@ -222,6 +246,28 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MI);
 }
 
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::instr_iterator I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI);
+}
+
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineInstr *I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID) {
+  if (I->isInsideBundle()) {
+    MachineBasicBlock::instr_iterator MII = I;
+    return BuildMI(BB, MII, DL, MCID);
+  }
+
+  MachineBasicBlock::iterator MII = I;
+  return BuildMI(BB, MII, DL, MCID);
+}
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction at the end of the given MachineBasicBlock, and does NOT take a
 /// destination register.
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
new file mode 100644
index 0000000..8c150e6
--- /dev/null
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -0,0 +1,34 @@
+//===-- CodeGen/MachineInstBundle.h - MI bundle utilities -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide utility functions to manipulate machine instruction
+// bundles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
+#define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+
+/// FinalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (inclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void FinalizeBundle(MachineBasicBlock &MBB,
+                    MachineBasicBlock::instr_iterator FirstMI,
+                    MachineBasicBlock::instr_iterator LastMI);
+  
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 5440a63..2a977de 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -102,6 +102,17 @@ private:
   ///
   bool IsUndef : 1;
 
+  /// IsInternalRead - True if this operand reads a value that was defined
+  /// inside the same instruction or bundle.  This flag can be set on both use
+  /// and def operands.  On a sub-register def operand, it refers to the part
+  /// of the register that isn't written.  On a full-register def operand, it
+  /// is a noop.
+  ///
+  /// When this flag is set, the instruction bundle must contain at least one
+  /// other def of the register.  If multiple instructions in the bundle define
+  /// the register, the meaning is target-defined.
+  bool IsInternalRead : 1;
+
   /// IsEarlyClobber - True if this MO_Register 'def' operand is written to
   /// by the MachineInstr before all input registers are read.  This is used to
   /// model the GCC inline asm '&' constraint modifier.
@@ -258,6 +269,11 @@ public:
     return IsUndef;
   }
 
+  bool isInternalRead() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsInternalRead;
+  }
+
   bool isEarlyClobber() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsEarlyClobber;
@@ -272,9 +288,12 @@ public:
   /// register.  A use operand with the <undef> flag set doesn't read its
   /// register.  A sub-register def implicitly reads the other parts of the
   /// register being redefined unless the <undef> flag is set.
+  ///
+  /// This refers to reading the register value from before the current
+  /// instruction or bundle. Internal bundle reads are not included.
   bool readsReg() const {
     assert(isReg() && "Wrong MachineOperand accessor");
-    return !isUndef() && (isUse() || getSubReg());
+    return !isUndef() && !isInternalRead() && (isUse() || getSubReg());
   }
 
   /// getNextOperandForReg - Return the next MachineOperand in the function that
@@ -343,6 +362,11 @@ public:
     IsUndef = Val;
   }
 
+  void setIsInternalRead(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    IsInternalRead = Val;
+  }
+
   void setIsEarlyClobber(bool Val = true) {
     assert(isReg() && IsDef && "Wrong MachineOperand accessor");
     IsEarlyClobber = Val;
@@ -498,6 +522,7 @@ public:
     Op.IsKill = isKill;
     Op.IsDead = isDead;
     Op.IsUndef = isUndef;
+    Op.IsInternalRead = false;
     Op.IsEarlyClobber = isEarlyClobber;
     Op.IsDebug = isDebug;
     Op.SmallContents.RegNo = Reg;
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 45b3353..b4f6cac 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -238,6 +238,10 @@ namespace llvm {
   ///
   FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC);
 
+  /// createUnpackMachineBundles - This pass unpack machine instruction bundles.
+  ///
+  FunctionPass *createUnpackMachineBundlesPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index ccd35c4..f637ea2 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -139,6 +139,7 @@ class SelectionDAG {
   const TargetSelectionDAGInfo &TSI;
   MachineFunction *MF;
   LLVMContext *Context;
+  CodeGenOpt::Level OptLevel;
 
   /// EntryNode - The starting token.
   SDNode EntryNode;
@@ -187,7 +188,7 @@ class SelectionDAG {
   SelectionDAG(const SelectionDAG&);   // Do not implement.
 
 public:
-  explicit SelectionDAG(const TargetMachine &TM);
+  explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level);
   ~SelectionDAG();
 
   /// init - Prepare this SelectionDAG to process code in the given
@@ -1034,6 +1035,7 @@ private:
                                void *&InsertPos);
   SDNode *FindModifiedNodeSlot(SDNode *N, const SDValue *Ops, unsigned NumOps,
                                void *&InsertPos);
+  SDNode *UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc loc);
 
   void DeleteNodeNotInCSEMaps(SDNode *N);
   void DeallocateNode(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 09be4fb..3c5c5df 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -29,6 +29,7 @@ namespace llvm {
   class MachineFunction;
   class MachineInstr;
   class TargetLowering;
+  class TargetLibraryInfo;
   class TargetInstrInfo;
   class FunctionLoweringInfo;
   class ScheduleHazardRecognizer;
@@ -42,6 +43,7 @@ class SelectionDAGISel : public MachineFunctionPass {
 public:
   const TargetMachine &TM;
   const TargetLowering &TLI;
+  const TargetLibraryInfo *LibInfo;
   FunctionLoweringInfo *FuncInfo;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 11e5b5a..b4356b1 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -32,9 +32,6 @@
 /* Define if position independent code is enabled */
 #cmakedefine ENABLE_PIC
 
-/* Define if threads enabled */
-#cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
-
 /* Define if timestamp information (e.g., __DATE___) is allowed */
 #cmakedefine ENABLE_TIMESTAMPS ${ENABLE_TIMESTAMPS}
 
@@ -548,18 +545,21 @@
 /* Installation directory for data files */
 #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
 
+/* Target triple LLVM will generate code for by default */
+#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
+
 /* Installation directory for documentation */
 #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
 
+/* Define if threads enabled */
+#cmakedefine01 LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
 
 /* Has gcc/MSVC atomic intrinsics */
 #cmakedefine01 LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
-
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
 
@@ -671,6 +671,9 @@
 /* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
 #undef STAT_MACROS_BROKEN
 
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
 /* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
 #undef TIME_WITH_SYS_TIME
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index b3a9590..30afa58 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -30,9 +30,6 @@
 /* Define if position independent code is enabled */
 #undef ENABLE_PIC
 
-/* Define if threads enabled */
-#undef ENABLE_THREADS
-
 /* Define if timestamp information (e.g., __DATE___) is allowed */
 #undef ENABLE_TIMESTAMPS
 
@@ -552,6 +549,9 @@
 /* Installation directory for documentation */
 #undef LLVM_DOCSDIR
 
+/* Define if threads enabled */
+#undef LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 8383886..0c5b542 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -25,18 +25,21 @@
 /* Installation directory for data files */
 #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
 
+/* Target triple LLVM will generate code for by default */
+#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
+
 /* Installation directory for documentation */
 #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
 
+/* Define if threads enabled */
+#cmakedefine01 LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
 
 /* Has gcc/MSVC atomic intrinsics */
 #cmakedefine01 LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
-
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
 
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index 1dd7ed6..30a935b 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -25,18 +25,21 @@
 /* Installation directory for data files */
 #undef LLVM_DATADIR
 
+/* Target triple LLVM will generate code for by default */
+#undef LLVM_DEFAULT_TARGET_TRIPLE
+
 /* Installation directory for documentation */
 #undef LLVM_DOCSDIR
 
+/* Define if threads enabled */
+#undef LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
 /* Has gcc/MSVC atomic intrinsics */
 #undef LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#undef LLVM_DEFAULT_TARGET_TRIPLE
-
 /* Installation directory for include files */
 #undef LLVM_INCLUDEDIR
 
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 445c3de..b9ade51 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -374,6 +374,7 @@ public:
   ///
   static VectorType *getInteger(VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    assert(EltBits && "Element size must be of a non-zero size");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
@@ -408,6 +409,7 @@ public:
   unsigned getNumElements() const { return NumElements; }
 
   /// @brief Return the number of bits in the Vector type.
+  /// Returns zero when the vector is a vector of pointers.
   unsigned getBitWidth() const {
     return NumElements * getElementType()->getPrimitiveSizeInBits();
   }
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index cf85671..e89fd2e 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -26,6 +26,7 @@
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 
 namespace llvm {
 
@@ -41,6 +42,7 @@ class MachineCodeInfo;
 class Module;
 class MutexGuard;
 class TargetData;
+class Triple;
 class Type;
 
 /// \brief Helper class for helping synchronize access to the global address map
@@ -132,14 +134,12 @@ protected:
     Module *M,
     std::string *ErrorStr,
     JITMemoryManager *JMM,
-    CodeGenOpt::Level OptLevel,
     bool GVsWithCode,
     TargetMachine *TM);
   static ExecutionEngine *(*MCJITCtor)(
     Module *M,
     std::string *ErrorStr,
     JITMemoryManager *JMM,
-    CodeGenOpt::Level OptLevel,
     bool GVsWithCode,
     TargetMachine *TM);
   static ExecutionEngine *(*InterpCtor)(Module *M, std::string *ErrorStr);
@@ -462,6 +462,7 @@ private:
   CodeGenOpt::Level OptLevel;
   JITMemoryManager *JMM;
   bool AllocateGVsWithCode;
+  TargetOptions Options;
   Reloc::Model RelocModel;
   CodeModel::Model CMModel;
   std::string MArch;
@@ -475,6 +476,7 @@ private:
     ErrorStr = NULL;
     OptLevel = CodeGenOpt::Default;
     JMM = NULL;
+    Options = TargetOptions();
     AllocateGVsWithCode = false;
     RelocModel = Reloc::Default;
     CMModel = CodeModel::JITDefault;
@@ -518,6 +520,13 @@ public:
     return *this;
   }
 
+  /// setTargetOptions - Set the target options that the ExecutionEngine
+  /// target is using. Defaults to TargetOptions().
+  EngineBuilder &setTargetOptions(const TargetOptions &Opts) {
+    Options = Opts;
+    return *this;
+  }
+
   /// setRelocationModel - Set the relocation model that the ExecutionEngine
   /// target is using. Defaults to target specific default "Reloc::Default".
   EngineBuilder &setRelocationModel(Reloc::Model RM) {
@@ -574,12 +583,14 @@ public:
 
   /// selectTarget - Pick a target either via -march or by guessing the native
   /// arch.  Add any CPU features specified via -mcpu or -mattr.
-  static TargetMachine *selectTarget(Module *M,
+  static TargetMachine *selectTarget(const Triple &TargetTriple,
                                      StringRef MArch,
                                      StringRef MCPU,
                                      const SmallVectorImpl<std::string>& MAttrs,
+                                     const TargetOptions &Options,
                                      Reloc::Model RM,
                                      CodeModel::Model CM,
+                                     CodeGenOpt::Level OL,
                                      std::string *Err);
 
   ExecutionEngine *create();
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index ceb1771..ce87f16 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -135,7 +135,6 @@ void initializeLoopInfoPass(PassRegistry&);
 void initializeLoopInstSimplifyPass(PassRegistry&);
 void initializeLoopRotatePass(PassRegistry&);
 void initializeLoopSimplifyPass(PassRegistry&);
-void initializeLoopSplitterPass(PassRegistry&);
 void initializeLoopStrengthReducePass(PassRegistry&);
 void initializeGlobalMergePass(PassRegistry&);
 void initializeLoopUnrollPass(PassRegistry&);
@@ -232,6 +231,7 @@ void initializeUnreachableMachineBlockElimPass(PassRegistry&);
 void initializeVerifierPass(PassRegistry&);
 void initializeVirtRegMapPass(PassRegistry&);
 void initializeInstSimplifierPass(PassRegistry&);
+void initializeUnpackMachineBundlesPass(PassRegistry&);
 
 }
 
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index a1492f3..2529f24 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -388,6 +388,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
 /// if (isa<CastInst>(Instr)) { ... }
 /// @brief Base class of casting instructions.
 class CastInst : public UnaryInstruction {
+  virtual void anchor();
 protected:
   /// @brief Constructor with insert-before-instruction semantics for subclasses
   CastInst(Type *Ty, unsigned iType, Value *S,
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 934e890..9c5ac44 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -143,7 +143,7 @@ public:
   
   /// getMetadata - Get the metadata of given kind attached to this Instruction.
   /// If the metadata is not found then return null.
-  MDNode *getMetadata(const char *Kind) const {
+  MDNode *getMetadata(StringRef Kind) const {
     if (!hasMetadata()) return 0;
     return getMetadataImpl(Kind);
   }
@@ -168,7 +168,7 @@ public:
   /// node.  This updates/replaces metadata if already present, or removes it if
   /// Node is null.
   void setMetadata(unsigned KindID, MDNode *Node);
-  void setMetadata(const char *Kind, MDNode *Node);
+  void setMetadata(StringRef Kind, MDNode *Node);
 
   /// setDebugLoc - Set the debug location information for this instruction.
   void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
@@ -185,7 +185,7 @@ private:
   
   // These are all implemented in Metadata.cpp.
   MDNode *getMetadataImpl(unsigned KindID) const;
-  MDNode *getMetadataImpl(const char *Kind) const;
+  MDNode *getMetadataImpl(StringRef Kind) const;
   void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
   void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
                                            MDNode*> > &) const;
@@ -244,26 +244,6 @@ public:
     return mayWriteToMemory() || mayThrow();
   }
 
-  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
-  /// have any effects besides calculating the result and does not have
-  /// undefined behavior.
-  ///
-  /// This method never returns true for an instruction that returns true for
-  /// mayHaveSideEffects; however, this method also does some other checks in
-  /// addition. It checks for undefined behavior, like dividing by zero or
-  /// loading from an invalid pointer (but not for undefined results, like a
-  /// shift with a shift amount larger than the width of the result). It checks
-  /// for malloc and alloca because speculatively executing them might cause a
-  /// memory leak. It also returns false for instructions related to control
-  /// flow, specifically terminators and PHI nodes.
-  ///
-  /// This method only looks at the instruction itself and its operands, so if
-  /// this method returns true, it is safe to move the instruction as long as
-  /// the correct dominance relationships for the operands and users hold.
-  /// However, this method can return true for instructions that read memory;
-  /// for such instructions, moving them may change the resulting value.
-  bool isSafeToSpeculativelyExecute() const;
-
   /// clone() - Create a copy of 'this' instruction that is identical in all
   /// ways except the following:
   ///   * The instruction has no parent
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 3faab35..e87950e 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -776,6 +776,10 @@ public:
   static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList);
   static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList);
 
+  /// getIndexedType - Returns the address space used by the GEP pointer.
+  ///
+  static unsigned getAddressSpace(Value *Ptr);
+
   inline op_iterator       idx_begin()       { return op_begin()+1; }
   inline const_op_iterator idx_begin() const { return op_begin()+1; }
   inline op_iterator       idx_end()         { return op_end(); }
@@ -788,7 +792,7 @@ public:
     return getOperand(0);
   }
   static unsigned getPointerOperandIndex() {
-    return 0U;                      // get index for modifying correct operand
+    return 0U;    // get index for modifying correct operand.
   }
 
   unsigned getPointerAddressSpace() const {
@@ -797,10 +801,25 @@ public:
 
   /// getPointerOperandType - Method to return the pointer operand as a
   /// PointerType.
-  PointerType *getPointerOperandType() const {
-    return reinterpret_cast<PointerType*>(getPointerOperand()->getType());
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
   }
 
+  /// GetGEPReturnType - Returns the pointer type returned by the GEP
+  /// instruction, which may be a vector of pointers.
+  static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
+    Type *PtrTy = PointerType::get(checkGEPType(
+                                   getIndexedType(Ptr->getType(), IdxList)),
+                                   getAddressSpace(Ptr));
+    // Vector GEP
+    if (Ptr->getType()->isVectorTy()) {
+      unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
+      return VectorType::get(PtrTy, NumElem);
+    }
+
+    // Scalar GEP
+    return PtrTy;
+  }
 
   unsigned getNumIndices() const {  // Note: always non-negative
     return getNumOperands() - 1;
@@ -847,10 +866,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      Instruction *InsertBefore)
-  : Instruction(PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                 cast<PointerType>(Ptr->getType())
-                                   ->getAddressSpace()),
+  : Instruction(getGEPReturnType(Ptr, IdxList),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertBefore) {
@@ -861,10 +877,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      BasicBlock *InsertAtEnd)
-  : Instruction(PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                 cast<PointerType>(Ptr->getType())
-                                   ->getAddressSpace()),
+  : Instruction(getGEPReturnType(Ptr, IdxList),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertAtEnd) {
@@ -905,7 +918,7 @@ public:
           "Both operands to ICmp instruction are not of the same type!");
     // Check that the operands are the right type
     assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->isPointerTy()) &&
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
            "Invalid operand types for ICmp instruction");
   }
 
@@ -945,7 +958,7 @@ public:
           "Both operands to ICmp instruction are not of the same type!");
     // Check that the operands are the right type
     assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->isPointerTy()) &&
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
            "Invalid operand types for ICmp instruction");
   }
 
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 5e5e150..5f31862 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -284,8 +284,8 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 let Properties = [IntrNoMem] in {
   def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
 }
 
 //===------------------------ Debugger Intrinsics -------------------------===//
@@ -326,7 +326,6 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_callsite         : Intrinsic<[], [llvm_i32_ty]>;
 }
 def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
-def int_eh_sjlj_dispatch_setup  : Intrinsic<[], [llvm_i32_ty]>;
 def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty]>;
 
@@ -445,3 +444,4 @@ include "llvm/IntrinsicsARM.td"
 include "llvm/IntrinsicsCellSPU.td"
 include "llvm/IntrinsicsXCore.td"
 include "llvm/IntrinsicsPTX.td"
+include "llvm/IntrinsicsHexagon.td"
diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IntrinsicsHexagon.td
new file mode 100644
index 0000000..eb5dc8f
--- /dev/null
+++ b/include/llvm/IntrinsicsHexagon.td
@@ -0,0 +1,3671 @@
+//===- IntrinsicsHexagon.td - Defines Hexagon intrinsics ---*- tablegen -*-===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the Hexagon-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all Hexagon intrinsics.
+//
+// All Hexagon intrinsics start with "llvm.hexagon.".
+let TargetPrefix = "hexagon" in {
+  /// Hexagon_Intrinsic - Base class for all altivec intrinsics.
+  class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_MEM,BT_BOOL,BT_PTR) ->
+// Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_ptr_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) ->
+// Hexagon_void_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
+// Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i16_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_SI,BT_INT,BT_INT) ->
+// Hexagon_si_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_SI,BT_LONGLONG,BT_INT) ->
+// Hexagon_di_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_DI,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_di_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_di_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_DI,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_di_di_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_di_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_QI,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_SI,BT_BOOL,BT_INT) ->
+// Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_QI,BT_LONGLONG,BT_BOOL) ->
+// Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_QI,BT_INT,BT_BOOL) ->
+// Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_SISI,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(void_ftype_SISI,BT_VOID,BT_INT,BT_INT) ->
+// Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_SISI,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(USI_ftype_SISI,BT_UINT,BT_INT,BT_INT) ->
+// Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_SISI,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(UDI_ftype_SISI,BT_ULONGLONG,BT_INT,BT_INT) ->
+// Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_SIDI,BT_LONGLONG,BT_INT,BT_LONGLONG) ->
+// Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_DISI,BT_LONGLONG,BT_LONGLONG,BT_INT) ->
+// Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_SIDI,BT_INT,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_DIDI,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_DIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(UDI_ftype_DIDI,BT_ULONGLONG,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_DISI,BT_INT,BT_LONGLONG,BT_INT) ->
+// Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_DIDI,BT_BOOL,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_QIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_QIQIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_QIQI,BT_INT,BT_BOOL,BT_BOOL) ->
+// Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_QISI,BT_INT,BT_BOOL,BT_INT) ->
+// Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i1_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(void_ftype_SISISI,BT_VOID,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SISISI,BT_INT,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_SISISI,BT_LONGLONG,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_DISISI,BT_INT,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DISISI,BT_LONGLONG,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SIDISI,BT_INT,BT_INT,BT_LONGLONG,BT_INT) ->
+// Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDISI,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG,BT_INT) ->
+// Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SIDIDI,BT_INT,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG) ->
+// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SISIDI,BT_INT,BT_INT,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_QISISI,BT_INT,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_QISISI,BT_LONGLONG,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i1_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_QIDIDI,BT_LONGLONG,BT_BOOL,BT_LONGLONG,
+//                     BT_LONGLONG) ->
+// Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIQI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
+//                     BT_BOOL) ->
+// Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_4(SI_ftype_SISISISI,BT_INT,BT_INT,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_4(DI_ftype_DIDISISI,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpeq : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeq">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgt">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgtu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpeqp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpeqp">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpgtp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtp">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpgtup : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtup">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsset : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsset">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsclr : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclr">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpeqi : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeqi">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgti : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgti">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgtui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtui">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgei : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgei">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgeui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgeui">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmplt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmplt">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpltu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpltu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsclri : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclri">;
+//
+// BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_and : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.and">;
+//
+// BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_or : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.or">;
+//
+// BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_xor : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.xor">;
+//
+// BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_andn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.andn">;
+//
+// BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1)
+//
+def int_hexagon_C2_not : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.not">;
+//
+// BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_orn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.orn">;
+//
+// BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1)
+//
+def int_hexagon_C2_pxfer_map : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.pxfer.map">;
+//
+// BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1)
+//
+def int_hexagon_C2_any8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.any8">;
+//
+// BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1)
+//
+def int_hexagon_C2_all8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.all8">;
+//
+// BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2)
+//
+def int_hexagon_C2_vitpack : Hexagon_si_qiqi_Intrinsic<"HEXAGON.C2.vitpack">;
+//
+// BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_mux : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.mux">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxii : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxii">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxir : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxir">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxri : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxri">;
+//
+// BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3)
+//
+def int_hexagon_C2_vmux : Hexagon_di_qididi_Intrinsic<"HEXAGON.C2.vmux">;
+//
+// BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1)
+//
+def int_hexagon_C2_mask : Hexagon_di_qi_Intrinsic<"HEXAGON.C2.mask">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpbeq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbeq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpbgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbgtu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpheq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpheq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmphgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgt">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmphgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgtu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpweq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpweq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpwgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgt">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpwgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgtu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1)
+//
+def int_hexagon_C2_tfrpr : Hexagon_si_qi_Intrinsic<"HEXAGON.C2.tfrpr">;
+//
+// BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1)
+//
+def int_hexagon_C2_tfrrp : Hexagon_qi_si_Intrinsic<"HEXAGON.C2.tfrrp">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hl_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hl_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_lh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_lh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_ll_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_ll_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hl_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hl_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_lh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_lh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_ll_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_ll_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hh_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hh_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hl_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hl_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_lh_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_lh_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_ll_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_ll_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hh_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hh_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hl_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hl_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_lh_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_lh_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_ll_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_ll_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpysmi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpysmi">;
+//
+// BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_macsip :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsip">;
+//
+// BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_macsin :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsin">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyss_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyss_acc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.acc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyss_nac_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.nac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyuu_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.dpmpyuu.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyuu_acc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.acc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyuu_nac_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.nac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_up :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_up :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.up">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyss_rnd_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.rnd.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyi">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyui :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyui">;
+//
+// BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_maci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.maci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_acci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.acci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_accii :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.accii">;
+//
+// BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_nacci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.nacci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_naccii :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.naccii">;
+//
+// BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_subacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.subacc">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2s_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2s_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s0pack :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0pack">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s1pack :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1pack">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vmpy2es_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vmpy2es_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrmac_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrmac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrmpy_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrmpy.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpyrs_s0 :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpyrs_s1 :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vdmacs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vdmacs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpys_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpys_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrs_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrs_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrsc_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrsc_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacs_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacs_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacsc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacsc_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpys_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpys_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpysc_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpysc_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacs_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacs_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacsc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacsc_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2)
+//
+def int_hexagon_M2_vrcmpys_s1 :
+Hexagon_di_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3)
+//
+def int_hexagon_M2_vrcmpys_acc_s1 :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.M2.vrcmpys.acc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2)
+//
+def int_hexagon_M2_vrcmpys_s1rp :
+Hexagon_si_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1rp">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_hmmpyl_rs1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyl.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_hmmpyh_rs1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmaci_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmacr_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmaci_s0c :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmacr_s0c :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmaci_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmaci.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacr_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyi_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyr_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyi_s0c :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyr_s0c :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyi_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyi.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyr_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s0_sat_i :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s0_sat_r :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s1_sat_i :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s1_sat_r :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vcmac_s0_sat_i :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vcmac_s0_sat_r :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_vcrotate :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.vcrotate">;
+//
+// BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_add :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.add">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_sub :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.sub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addsat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subsat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hl">;
+def int_hexagon_A2_addh_l16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.lh">;
+def int_hexagon_A2_addh_l16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hl">;
+def int_hexagon_A2_addh_l16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.lh">;
+def int_hexagon_A2_addh_l16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_aslh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.aslh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_asrh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.asrh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_addp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_addpsat :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addpsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2)
+//
+def int_hexagon_A2_addsp :
+Hexagon_di_sidi_Intrinsic<"HEXAGON.A2.addsp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_subp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.subp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1)
+//
+def int_hexagon_A2_neg :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.neg">;
+//
+// BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1)
+//
+def int_hexagon_A2_negsat :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.negsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1)
+//
+def int_hexagon_A2_abs :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.abs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1)
+//
+def int_hexagon_A2_abssat :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.abssat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vconj :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vconj">;
+//
+// BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_negp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.negp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_absp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.absp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_max :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.max">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2)
+//
+def int_hexagon_A2_maxu :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.maxu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_min :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.min">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2)
+//
+def int_hexagon_A2_minu :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.minu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_maxp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.maxp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2)
+//
+def int_hexagon_A2_maxup :
+Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.maxup">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_minp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.minp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2)
+//
+def int_hexagon_A2_minup :
+Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.minup">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1)
+//
+def int_hexagon_A2_tfr :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1)
+//
+def int_hexagon_A2_tfrsi :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfrsi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_tfrp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.tfrp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1)
+//
+def int_hexagon_A2_tfrpi :
+Hexagon_di_si_Intrinsic<"HEXAGON.A2.tfrpi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_zxtb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxtb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sxtb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxtb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1)
+//
+def int_hexagon_A2_zxth :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxth">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sxth :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxth">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2)
+//
+def int_hexagon_A2_combinew :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combinew">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2)
+//
+def int_hexagon_A2_combineii :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combineii">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_tfril :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfril">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_tfrih :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfrih">;
+//
+// BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_and :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.and">;
+//
+// BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_or :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.or">;
+//
+// BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_xor :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.xor">;
+//
+// BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1)
+//
+def int_hexagon_A2_not :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.not">;
+//
+// BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_xor_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.xor.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subri">;
+//
+// BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_andir :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.andir">;
+//
+// BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_orir :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.orir">;
+//
+// BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_andp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.andp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_orp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.orp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_xorp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.xorp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_notp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.notp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1)
+//
+def int_hexagon_A2_sxtw :
+Hexagon_di_si_Intrinsic<"HEXAGON.A2.sxtw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1)
+//
+def int_hexagon_A2_sat :
+Hexagon_si_di_Intrinsic<"HEXAGON.A2.sat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sath :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sath">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satuh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satub :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddubs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddubs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddh  :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vadduhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vadduhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddws :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddws">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svavgh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svavghs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavghs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svnavgh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svnavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svaddh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svaddhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svadduhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svadduhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubuhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubuhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vraddub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vraddub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_A2_vraddub_acc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vraddub.acc">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vradduh :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vradduh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsububs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsububs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubuhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubuhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubws :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubws">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabsh :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabshsat :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabshsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabsw :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabswsat :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabswsat">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vabsdiffw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffw">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vabsdiffh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vrsadub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vrsadub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_A2_vrsadub_acc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vrsadub.acc">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgwcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgwcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavghcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavghcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgubr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgubr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguhr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguhr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavghr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavghr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminuh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxuh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminuw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxuw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsr_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsl_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsl.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_r_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_r_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsr_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_xacc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_xacc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_i_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r_rnd :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_addasl_rrri :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.addasl.rrri">;
+//
+// BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_valignib :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.valignib">;
+//
+// BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3)
+//
+def int_hexagon_S2_valignrb :
+Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.valignrb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_vspliceib :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.vspliceib">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3)
+//
+def int_hexagon_S2_vsplicerb :
+Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.vsplicerb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsplatrh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsplatrh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_vsplatrb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.vsplatrb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_insert :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.insert">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxb_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxb.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxh_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxh.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxw_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxw.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxd_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxd.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_extractu :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.extractu">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4)
+//
+def int_hexagon_S2_insertp :
+Hexagon_di_didisisi_Intrinsic<"HEXAGON.S2.insertp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3)
+//
+def int_hexagon_S2_extractup :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.S2.extractup">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3)
+//
+def int_hexagon_S2_insert_rp :
+Hexagon_si_sisidi_Intrinsic<"HEXAGON.S2.insert.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2)
+//
+def int_hexagon_S2_extractu_rp :
+Hexagon_si_sidi_Intrinsic<"HEXAGON.S2.extractu.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_S2_insertp_rp :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.S2.insertp.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_extractup_rp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.extractup.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2)
+//
+def int_hexagon_S2_tstbit_i :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_setbit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_togglebit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_clrbit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2)
+//
+def int_hexagon_S2_tstbit_r :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_setbit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_togglebit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_clrbit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_svw_trun :
+Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.i.svw.trun">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_svw_trun :
+Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.r.svw.trun">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vrndpackwh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vrndpackwhs :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwhs">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsxtbh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxtbh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vzxtbh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxtbh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathub :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathub">;
+//
+// BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1)
+//
+def int_hexagon_S2_svsathub :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathub">;
+//
+// BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_svsathb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vtrunohb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunohb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_vtrunewh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunewh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_vtrunowh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunowh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vtrunehb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunehb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsxthw :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxthw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vzxthw :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxthw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwuh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwuh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2)
+//
+def int_hexagon_S2_packhl :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.S2.packhl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1)
+//
+def int_hexagon_A2_swiz :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.swiz">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathub_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathub.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathb_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathb.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwh_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwh.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwuh_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwuh.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffob :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffob">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffeb :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffoh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffoh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffeh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2)
+//
+def int_hexagon_S2_parityp :
+Hexagon_si_didi_Intrinsic<"HEXAGON.S2.parityp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_lfsp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.lfsp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1)
+//
+def int_hexagon_S2_clbnorm :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.clbnorm">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_clb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.clb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1)
+//
+def int_hexagon_S2_cl0 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl0">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1)
+//
+def int_hexagon_S2_cl1 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl1">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1)
+//
+def int_hexagon_S2_clbp :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.clbp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1)
+//
+def int_hexagon_S2_cl0p :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl0p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1)
+//
+def int_hexagon_S2_cl1p :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl1p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1)
+//
+def int_hexagon_S2_brev :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.brev">;
+//
+// BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1)
+//
+def int_hexagon_S2_ct0 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct0">;
+//
+// BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1)
+//
+def int_hexagon_S2_ct1 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct1">;
+//
+// BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1)
+//
+def int_hexagon_S2_interleave :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.interleave">;
+//
+// BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1)
+//
+def int_hexagon_S2_deinterleave :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.deinterleave">;
+
+//
+// BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1)
+//
+def int_hexagon_SI_to_SXTHI_asrh :
+Hexagon_si_si_Intrinsic<"SI.to.SXTHI.asrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_orn :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.orn">;
+//
+// BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_andn :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.andn">;
+//
+// BUILTIN_INFO(HEXAGON.A4_orn,DI_ftype_DIDI,2)
+//
+def int_hexagon_A4_ornp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A4.ornp">;
+//
+// BUILTIN_INFO(HEXAGON.A4_andn,DI_ftype_DIDI,2)
+//
+def int_hexagon_A4_andnp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A4.andnp">;
+//
+// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2)
+//
+def int_hexagon_A4_combineir :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineir">;
+//
+// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2)
+//
+def int_hexagon_A4_combineri :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineri">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpneq :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneq">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpneqi :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneqi">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplte :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplte">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpltei :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpltei">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplteu :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteu">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplteui :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteui">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpneq :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneq">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpneqi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneqi">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpeq :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeq">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpeqi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeqi">;
+//
+// BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2)
+//
+def int_hexagon_C4_fastcorner9 :
+Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9">;
+//
+// BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2)
+//
+def int_hexagon_C4_fastcorner9_not :
+Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9_not">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_andn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_andn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_and :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_and">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_orn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_orn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_or :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_or">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_andn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_andn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_and :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_and">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_orn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_orn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_or :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_or">;
+//
+// BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_addaddi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addaddi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_subaddi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subaddi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_andnp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S4_andnp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S4.andnp">;
+//
+// BUILTIN_INFO(HEXAGON.S4_ornp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S4_ornp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S4.ornp">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M4_xor_xacc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.xor_xacc">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_andn">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_xor :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_xor">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_andn">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_xor :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_xor">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_andn">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_andix :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andix">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_andi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_ori :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_ori">;
+//
+// BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_modwrapu :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.modwrapu">;
+//
+// BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_cround_ri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_ri">;
+//
+// BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_cround_rr :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_rr">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_ri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_rr :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_ri_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri_sat">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_rr_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr_sat">;
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index cba6599..2d5d9ff 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -1822,6 +1822,144 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
+// FMA4
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_ps_256 :
+               GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
 // MMX
 
 // Empty MMX state op.
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index f690d04..8b5d0d4 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -69,7 +69,7 @@ namespace {
       (void) llvm::createEdgeProfilerPass();
       (void) llvm::createOptimalEdgeProfilerPass();
       (void) llvm::createPathProfilerPass();
-      (void) llvm::createGCOVProfilerPass(true, true, false);
+      (void) llvm::createGCOVProfilerPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createGlobalDCEPass();
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 4a0cf37..b841ddb 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -16,9 +16,11 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
+class MCAsmLayout;
 class MCELFObjectTargetWriter;
 class MCFixup;
 class MCInst;
+class MCInstFragment;
 class MCObjectWriter;
 class MCSection;
 template<typename T>
@@ -104,6 +106,13 @@ public:
   /// \param Inst - The instruction to test.
   virtual bool MayNeedRelaxation(const MCInst &Inst) const = 0;
 
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                                    uint64_t Value,
+                                    const MCInstFragment *DF,
+                                    const MCAsmLayout &Layout) const = 0;
+
   /// RelaxInstruction - Relax the instruction in the given fragment to the next
   /// wider instruction.
   ///
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index d7402b3..5accabc 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -36,10 +36,6 @@ namespace llvm {
     enum LCOMMType { None, NoAlignment, ByteAlignment };
   }
 
-  namespace Structors {
-    enum OutputOrder { None, PriorityOrder, ReversePriorityOrder };
-  }
-
   /// MCAsmInfo - This class is intended to be used as a base class for asm
   /// properties and features specific to the target.
   class MCAsmInfo {
@@ -72,11 +68,6 @@ namespace llvm {
     /// the macho-specific .tbss directive for emitting thread local BSS Symbols
     bool HasMachoTBSSDirective;                 // Default is false.
 
-    /// StructorOutputOrder - Whether the static ctor/dtor list should be output
-    /// in no particular order, in order of increasing priority or the reverse:
-    /// in order of decreasing priority (the default).
-    Structors::OutputOrder StructorOutputOrder; // Default is reverse order.
-
     /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
     /// emit a ".reference .constructors_used" or ".reference .destructors_used"
     /// directive after the a static ctor/dtor list.  This directive is only
@@ -428,9 +419,6 @@ namespace llvm {
     //
     bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
     bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
-    Structors::OutputOrder getStructorOutputOrder() const {
-      return StructorOutputOrder;
-    }
     bool hasStaticCtorDtorReferenceInStaticMode() const {
       return HasStaticCtorDtorReferenceInStaticMode;
     }
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index a3ee159..ba699d7 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -18,6 +18,16 @@ namespace llvm {
     explicit MCAsmInfoCOFF();
       
   };
+
+  class MCAsmInfoMicrosoft : public MCAsmInfoCOFF {
+  protected:
+    explicit MCAsmInfoMicrosoft();
+  };
+
+  class MCAsmInfoGNUCOFF : public MCAsmInfoCOFF {
+  protected:
+    explicit MCAsmInfoGNUCOFF();
+  };
 }
 
 
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index b8f8cc4..687dd0c 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -711,43 +711,44 @@ private:
   /// \return Whether the fixup value was fully resolved. This is true if the
   /// \arg Value result is fixed, otherwise the value may change due to
   /// relocation.
-  bool EvaluateFixup(const MCAsmLayout &Layout,
+  bool evaluateFixup(const MCAsmLayout &Layout,
                      const MCFixup &Fixup, const MCFragment *DF,
                      MCValue &Target, uint64_t &Value) const;
 
   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
   /// (increased in size, in order to hold its value correctly).
-  bool FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF,
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   /// Check whether the given fragment needs relaxation.
-  bool FragmentNeedsRelaxation(const MCInstFragment *IF,
+  bool fragmentNeedsRelaxation(const MCInstFragment *IF,
                                const MCAsmLayout &Layout) const;
 
-  /// LayoutOnce - Perform one layout iteration and return true if any offsets
+  /// layoutOnce - Perform one layout iteration and return true if any offsets
   /// were adjusted.
-  bool LayoutOnce(MCAsmLayout &Layout);
+  bool layoutOnce(MCAsmLayout &Layout);
 
-  bool LayoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
+  bool layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
 
-  bool RelaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
+  bool relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
 
-  bool RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
+  bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
 
-  bool RelaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
-  bool RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+  bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
+  bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                    MCDwarfCallFrameFragment &DF);
 
-  /// FinishLayout - Finalize a layout, including fragment lowering.
-  void FinishLayout(MCAsmLayout &Layout);
+  /// finishLayout - Finalize a layout, including fragment lowering.
+  void finishLayout(MCAsmLayout &Layout);
 
-  uint64_t HandleFixup(const MCAsmLayout &Layout,
+  uint64_t handleFixup(const MCAsmLayout &Layout,
                        MCFragment &F, const MCFixup &Fixup);
 
 public:
   /// Compute the effective fragment size assuming it is laid out at the given
   /// \arg SectionAddress and \arg FragmentOffset.
-  uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const;
+  uint64_t computeFragmentSize(const MCAsmLayout &Layout,
+                               const MCFragment &F) const;
 
   /// Find the symbol which defines the atom containing the given symbol, or
   /// null if there is no such symbol.
@@ -760,7 +761,7 @@ public:
   bool isSymbolLinkerVisible(const MCSymbol &SD) const;
 
   /// Emit the section contents using the given object writer.
-  void WriteSectionData(const MCSectionData *Section,
+  void writeSectionData(const MCSectionData *Section,
                         const MCAsmLayout &Layout) const;
 
   /// Check whether a given symbol has been flagged with .thumb_func.
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index d45b0c8..455b45e 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -108,6 +108,16 @@ namespace llvm {
     /// The default initial text section that we generate dwarf debugging line
     /// info for when generating dwarf assembly source files.
     const MCSection *GenDwarfSection;
+    /// Symbols created for the start and end of this section.
+    MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym;
+
+    /// The information gathered from labels that will have dwarf subprogram
+    /// entries when generating dwarf assembly source files.
+    std::vector<const MCGenDwarfSubprogramEntry *> MCGenDwarfSubprogramEntries;
+
+    /// The string to embed in the debug information for the compile unit, if
+    /// non-empty.
+    StringRef DwarfDebugFlags;
 
     /// Honor temporary labels, this is useful for debugging semantic
     /// differences between temporary and non-temporary labels (primarily on
@@ -269,6 +279,24 @@ namespace llvm {
     unsigned nextGenDwarfFileNumber() { return ++GenDwarfFileNumber; }
     const MCSection *getGenDwarfSection() { return GenDwarfSection; }
     void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; }
+    MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; }
+    void setGenDwarfSectionStartSym(MCSymbol *Sym) {
+      GenDwarfSectionStartSym = Sym;
+    }
+    MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; }
+    void setGenDwarfSectionEndSym(MCSymbol *Sym) {
+      GenDwarfSectionEndSym = Sym;
+    }
+    const std::vector<const MCGenDwarfSubprogramEntry *>
+      &getMCGenDwarfSubprogramEntries() const {
+      return MCGenDwarfSubprogramEntries;
+    }
+    void addMCGenDwarfSubprogramEntry(const MCGenDwarfSubprogramEntry *E) {
+      MCGenDwarfSubprogramEntries.push_back(E);
+    }
+
+    void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; }
+    StringRef getDwarfDebugFlags() { return DwarfDebugFlags; }
 
     /// @}
 
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 431e3c4..9c77218 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -31,6 +31,8 @@ namespace llvm {
   class MCSymbol;
   class MCObjectStreamer;
   class raw_ostream;
+  class SourceMgr;
+  class SMLoc;
 
   /// MCDwarfFile - Instances of this class represent the name of the dwarf
   /// .file directive and its associated dwarf file number in the MC file,
@@ -227,6 +229,46 @@ namespace llvm {
                       int64_t LineDelta, uint64_t AddrDelta);
   };
 
+  class MCGenDwarfInfo {
+  public:
+    //
+    // When generating dwarf for assembly source files this emits the Dwarf
+    // sections.
+    //
+    static void Emit(MCStreamer *MCOS);
+  };
+
+  // When generating dwarf for assembly source files this is the info that is
+  // needed to be gathered for each symbol that will have a dwarf2_subprogram.
+  class MCGenDwarfSubprogramEntry {
+  private:
+    // Name of the symbol without a leading underbar, if any.
+    StringRef Name;
+    // The dwarf file number this symbol is in.
+    unsigned FileNumber;
+    // The line number this symbol is at.
+    unsigned LineNumber;
+    // The low_pc for the dwarf2_subprogram is taken from this symbol.  The
+    // high_pc is taken from the next symbol's value or the end of the section
+    // for the last symbol
+    MCSymbol *Label;
+
+  public:
+    MCGenDwarfSubprogramEntry(StringRef name, unsigned fileNumber,
+                              unsigned lineNumber, MCSymbol *label) :
+      Name(name), FileNumber(fileNumber), LineNumber(lineNumber), Label(label){}
+
+    StringRef getName() const { return Name; }
+    unsigned getFileNumber() const { return FileNumber; }
+    unsigned getLineNumber() const { return LineNumber; }
+    MCSymbol *getLabel() const { return Label; }
+
+    // This is called when label is created when we are generating dwarf for
+    // assembly source files.
+    static void Make(MCSymbol *Symbol, MCStreamer *MCOS, SourceMgr &SrcMgr,
+                     SMLoc &Loc);
+  };
+
   class MCCFIInstruction {
   public:
     enum OpType { SameValue, Remember, Restore, Move, RelMove };
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 3172fd7..e7f5b6a 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -179,10 +179,14 @@ public:
     VK_Mips_None,
     VK_Mips_GPREL,
     VK_Mips_GOT_CALL,
+    VK_Mips_GOT16,
     VK_Mips_GOT,
     VK_Mips_ABS_HI,
     VK_Mips_ABS_LO,
     VK_Mips_TLSGD,
+    VK_Mips_TLSLDM,
+    VK_Mips_DTPREL_HI,
+    VK_Mips_DTPREL_LO,
     VK_Mips_GOTTPREL,
     VK_Mips_TPREL_HI,
     VK_Mips_TPREL_LO,
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index aafa800..6d71cf5 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -184,6 +184,10 @@ public:
     return NumDefs;
   }
 
+  /// getFlags - Return flags of this instruction.
+  ///
+  unsigned getFlags() const { return Flags; }
+
   /// isVariadic - Return true if this instruction can have a variable number of
   /// operands.  In this case, the variable operands will be after the normal
   /// operands but before the implicit definitions and uses (if any are
@@ -198,84 +202,6 @@ public:
     return Flags & (1 << MCID::HasOptionalDef);
   }
 
-  /// getImplicitUses - Return a list of registers that are potentially
-  /// read by any instance of this machine instruction.  For example, on X86,
-  /// the "adc" instruction adds two register operands and adds the carry bit in
-  /// from the flags register.  In this case, the instruction is marked as
-  /// implicitly reading the flags.  Likewise, the variable shift instruction on
-  /// X86 is marked as implicitly reading the 'CL' register, which it always
-  /// does.
-  ///
-  /// This method returns null if the instruction has no implicit uses.
-  const unsigned *getImplicitUses() const {
-    return ImplicitUses;
-  }
-
-  /// getNumImplicitUses - Return the number of implicit uses this instruction
-  /// has.
-  unsigned getNumImplicitUses() const {
-    if (ImplicitUses == 0) return 0;
-    unsigned i = 0;
-    for (; ImplicitUses[i]; ++i) /*empty*/;
-    return i;
-  }
-
-  /// getImplicitDefs - Return a list of registers that are potentially
-  /// written by any instance of this machine instruction.  For example, on X86,
-  /// many instructions implicitly set the flags register.  In this case, they
-  /// are marked as setting the FLAGS.  Likewise, many instructions always
-  /// deposit their result in a physical register.  For example, the X86 divide
-  /// instruction always deposits the quotient and remainder in the EAX/EDX
-  /// registers.  For that instruction, this will return a list containing the
-  /// EAX/EDX/EFLAGS registers.
-  ///
-  /// This method returns null if the instruction has no implicit defs.
-  const unsigned *getImplicitDefs() const {
-    return ImplicitDefs;
-  }
-
-  /// getNumImplicitDefs - Return the number of implicit defs this instruction
-  /// has.
-  unsigned getNumImplicitDefs() const {
-    if (ImplicitDefs == 0) return 0;
-    unsigned i = 0;
-    for (; ImplicitDefs[i]; ++i) /*empty*/;
-    return i;
-  }
-
-  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
-  /// uses the specified physical register.
-  bool hasImplicitUseOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpUses = ImplicitUses)
-      for (; *ImpUses; ++ImpUses)
-        if (*ImpUses == Reg) return true;
-    return false;
-  }
-
-  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
-  /// defines the specified physical register.
-  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpDefs = ImplicitDefs)
-      for (; *ImpDefs; ++ImpDefs)
-        if (*ImpDefs == Reg) return true;
-    return false;
-  }
-
-  /// getSchedClass - Return the scheduling class for this instruction.  The
-  /// scheduling class is an index into the InstrItineraryData table.  This
-  /// returns zero if there is no known scheduling information for the
-  /// instruction.
-  ///
-  unsigned getSchedClass() const {
-    return SchedClass;
-  }
-
-  /// getSize - Return the number of bytes in the encoding of this instruction,
-  /// or zero if the encoding size cannot be known from the opcode.
-  unsigned getSize() const {
-    return Size;
-  }
-
   /// isPseudo - Return true if this is a pseudo instruction that doesn't
   /// correspond to a real machine instruction.
   ///
@@ -298,18 +224,6 @@ public:
     return Flags & (1 << MCID::Barrier);
   }
 
-  /// findFirstPredOperandIdx() - Find the index of the first operand in the
-  /// operand list that is used to represent the predicate. It returns -1 if
-  /// none is found.
-  int findFirstPredOperandIdx() const {
-    if (isPredicable()) {
-      for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-        if (OpInfo[i].isPredicate())
-          return i;
-    }
-    return -1;
-  }
-
   /// isTerminator - Returns true if this instruction part of the terminator for
   /// a basic block.  Typically this is things like return and branch
   /// instructions.
@@ -530,6 +444,97 @@ public:
   bool hasExtraDefRegAllocReq() const {
     return Flags & (1 << MCID::ExtraDefRegAllocReq);
   }
+
+
+  /// getImplicitUses - Return a list of registers that are potentially
+  /// read by any instance of this machine instruction.  For example, on X86,
+  /// the "adc" instruction adds two register operands and adds the carry bit in
+  /// from the flags register.  In this case, the instruction is marked as
+  /// implicitly reading the flags.  Likewise, the variable shift instruction on
+  /// X86 is marked as implicitly reading the 'CL' register, which it always
+  /// does.
+  ///
+  /// This method returns null if the instruction has no implicit uses.
+  const unsigned *getImplicitUses() const {
+    return ImplicitUses;
+  }
+
+  /// getNumImplicitUses - Return the number of implicit uses this instruction
+  /// has.
+  unsigned getNumImplicitUses() const {
+    if (ImplicitUses == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitUses[i]; ++i) /*empty*/;
+    return i;
+  }
+
+  /// getImplicitDefs - Return a list of registers that are potentially
+  /// written by any instance of this machine instruction.  For example, on X86,
+  /// many instructions implicitly set the flags register.  In this case, they
+  /// are marked as setting the FLAGS.  Likewise, many instructions always
+  /// deposit their result in a physical register.  For example, the X86 divide
+  /// instruction always deposits the quotient and remainder in the EAX/EDX
+  /// registers.  For that instruction, this will return a list containing the
+  /// EAX/EDX/EFLAGS registers.
+  ///
+  /// This method returns null if the instruction has no implicit defs.
+  const unsigned *getImplicitDefs() const {
+    return ImplicitDefs;
+  }
+
+  /// getNumImplicitDefs - Return the number of implicit defs this instruction
+  /// has.
+  unsigned getNumImplicitDefs() const {
+    if (ImplicitDefs == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitDefs[i]; ++i) /*empty*/;
+    return i;
+  }
+
+  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
+  /// uses the specified physical register.
+  bool hasImplicitUseOfPhysReg(unsigned Reg) const {
+    if (const unsigned *ImpUses = ImplicitUses)
+      for (; *ImpUses; ++ImpUses)
+        if (*ImpUses == Reg) return true;
+    return false;
+  }
+
+  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
+  /// defines the specified physical register.
+  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
+    if (const unsigned *ImpDefs = ImplicitDefs)
+      for (; *ImpDefs; ++ImpDefs)
+        if (*ImpDefs == Reg) return true;
+    return false;
+  }
+
+  /// getSchedClass - Return the scheduling class for this instruction.  The
+  /// scheduling class is an index into the InstrItineraryData table.  This
+  /// returns zero if there is no known scheduling information for the
+  /// instruction.
+  ///
+  unsigned getSchedClass() const {
+    return SchedClass;
+  }
+
+  /// getSize - Return the number of bytes in the encoding of this instruction,
+  /// or zero if the encoding size cannot be known from the opcode.
+  unsigned getSize() const {
+    return Size;
+  }
+
+  /// findFirstPredOperandIdx() - Find the index of the first operand in the
+  /// operand list that is used to represent the predicate. It returns -1 if
+  /// none is found.
+  int findFirstPredOperandIdx() const {
+    if (isPredicable()) {
+      for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+        if (OpInfo[i].isPredicate())
+          return i;
+    }
+    return -1;
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 2c5a0a1..d91b11b 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -19,10 +19,14 @@
 #include "llvm/MC/SectionKind.h"
 
 namespace llvm {
-class MCContext;
-class MCSection;
-class Triple;
+  class MCContext;
+  class MCSection;
+  class Triple;
   
+  namespace Structors {
+    enum OutputOrder { None, PriorityOrder, ReversePriorityOrder };
+  }
+
 class MCObjectFileInfo {  
 protected:
   /// CommDirectiveSupportsAlignment - True if .comm supports alignment.  This
@@ -164,6 +168,11 @@ protected:
   const MCSection *PDataSection;
   const MCSection *XDataSection;
   
+  /// StructorOutputOrder - Whether the static ctor/dtor list should be output
+  /// in no particular order, in order of increasing priority or the reverse:
+  /// in order of decreasing priority (the default).
+  Structors::OutputOrder StructorOutputOrder; // Default is reverse order.
+
 public:
   void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM,
                             MCContext &ctx);
@@ -291,6 +300,10 @@ public:
     return EHFrameSection;
   }
 
+  Structors::OutputOrder getStructorOutputOrder() const {
+    return StructorOutputOrder;
+  }
+
 private:
   enum Environment { IsMachO, IsELF, IsCOFF };
   Environment Env;
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 9810648..1c1fe02 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -641,8 +641,8 @@ namespace llvm {
   /// createELFStreamer - Create a machine code streamer which will generate
   /// ELF format object files.
   MCStreamer *createELFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-				raw_ostream &OS, MCCodeEmitter *CE,
-				bool RelaxAll, bool NoExecStack);
+                                raw_ostream &OS, MCCodeEmitter *CE,
+                                bool RelaxAll, bool NoExecStack);
 
   /// createLoggingStreamer - Create a machine code streamer which just logs the
   /// API calls and then dispatches to another streamer.
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 887e33c..59b4b26 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -225,6 +225,9 @@ public:
 
   /// print - Implement operator<< on NamedMDNode.
   void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
+
+  /// dump() - Allow printing of NamedMDNodes from the debugger.
+  void dump() const;
 };
 
 } // end llvm namespace
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index 71e342f..73bde8e 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -105,7 +105,7 @@ private:
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
@@ -135,6 +135,8 @@ protected:
                                        RelocationRef &Res) const;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const;
   virtual error_code getRelocationType(DataRefImpl Rel,
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index c6454ff..7e3a90d 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -49,7 +49,7 @@ public:
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
@@ -79,6 +79,8 @@ protected:
                                        RelocationRef &Res) const;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const;
   virtual error_code getRelocationType(DataRefImpl Rel,
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index a0f3c4b..fd180d4 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -104,6 +104,7 @@ public:
   error_code getNext(RelocationRef &Result) const;
 
   error_code getAddress(uint64_t &Result) const;
+  error_code getOffset(uint64_t &Result) const;
   error_code getSymbol(SymbolRef &Result) const;
   error_code getType(uint64_t &Result) const;
 
@@ -195,7 +196,7 @@ public:
 
   error_code getName(StringRef &Result) const;
   error_code getAddress(uint64_t &Result) const;
-  error_code getOffset(uint64_t &Result) const;
+  error_code getFileOffset(uint64_t &Result) const;
   error_code getSize(uint64_t &Result) const;
   error_code getType(SymbolRef::Type &Result) const;
 
@@ -254,7 +255,7 @@ protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const = 0;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const =0;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const =0;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const =0;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0;
   virtual error_code getSymbolType(DataRefImpl Symb,
                                    SymbolRef::Type &Res) const = 0;
@@ -289,6 +290,8 @@ protected:
                                        RelocationRef &Res) const = 0;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const =0;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const =0;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const = 0;
   virtual error_code getRelocationType(DataRefImpl Rel,
@@ -363,8 +366,8 @@ inline error_code SymbolRef::getAddress(uint64_t &Result) const {
   return OwningObject->getSymbolAddress(SymbolPimpl, Result);
 }
 
-inline error_code SymbolRef::getOffset(uint64_t &Result) const {
-  return OwningObject->getSymbolOffset(SymbolPimpl, Result);
+inline error_code SymbolRef::getFileOffset(uint64_t &Result) const {
+  return OwningObject->getSymbolFileOffset(SymbolPimpl, Result);
 }
 
 inline error_code SymbolRef::getSize(uint64_t &Result) const {
@@ -486,6 +489,10 @@ inline error_code RelocationRef::getAddress(uint64_t &Result) const {
   return OwningObject->getRelocationAddress(RelocationPimpl, Result);
 }
 
+inline error_code RelocationRef::getOffset(uint64_t &Result) const {
+  return OwningObject->getRelocationOffset(RelocationPimpl, Result);
+}
+
 inline error_code RelocationRef::getSymbol(SymbolRef &Result) const {
   return OwningObject->getRelocationSymbol(RelocationPimpl, Result);
 }
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 48a5796..abd6a19 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -261,8 +261,8 @@ public:
 
   /// getPointerOperandType - Method to return the pointer operand as a
   /// PointerType.
-  PointerType *getPointerOperandType() const {
-    return reinterpret_cast<PointerType*>(getPointerOperand()->getType());
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
   }
 
   unsigned getNumIndices() const {  // Note: always non-negative
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 0827909..c50c2cc 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -25,6 +25,7 @@
 #include "llvm/PassRegistry.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Valgrind.h"
 #include <vector>
 
 namespace llvm {
@@ -135,7 +136,10 @@ private:
   if (old_val == 0) { \
     function(Registry); \
     sys::MemoryFence(); \
+    TsanIgnoreWritesBegin(); \
+    TsanHappensBefore(&initialized); \
     initialized = 2; \
+    TsanIgnoreWritesEnd(); \
   } else { \
     sys::cas_flag tmp = initialized; \
     sys::MemoryFence(); \
@@ -143,7 +147,8 @@ private:
       tmp = initialized; \
       sys::MemoryFence(); \
     } \
-  }
+  } \
+  TsanHappensAfter(&initialized);
 
 #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \
   static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index 29313ef..6e354f9 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -314,6 +314,7 @@ template <> struct GraphTraits<Function*> : public GraphTraits<BasicBlock*> {
   typedef Function::iterator nodes_iterator;
   static nodes_iterator nodes_begin(Function *F) { return F->begin(); }
   static nodes_iterator nodes_end  (Function *F) { return F->end(); }
+  static unsigned       size       (Function *F) { return F->size(); }
 };
 template <> struct GraphTraits<const Function*> :
   public GraphTraits<const BasicBlock*> {
@@ -323,6 +324,7 @@ template <> struct GraphTraits<const Function*> :
   typedef Function::const_iterator nodes_iterator;
   static nodes_iterator nodes_begin(const Function *F) { return F->begin(); }
   static nodes_iterator nodes_end  (const Function *F) { return F->end(); }
+  static unsigned       size       (const Function *F) { return F->size(); }
 };
 
 
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index c6b62a8..a2990e4 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -326,6 +326,8 @@ LocationClass<Ty> location(Ty &L) { return LocationClass<Ty>(L); }
 struct GenericOptionValue {
   virtual ~GenericOptionValue() {}
   virtual bool compare(const GenericOptionValue &V) const = 0;
+private:
+  virtual void anchor();
 };
 
 template<class DataType> struct OptionValue;
@@ -416,6 +418,8 @@ struct OptionValue<cl::boolOrDefault> : OptionValueCopy<cl::boolOrDefault> {
     setValue(V);
     return *this;
   }
+private:
+  virtual void anchor();
 };
 
 template<>
@@ -431,6 +435,8 @@ struct OptionValue<std::string> : OptionValueCopy<std::string> {
     setValue(V);
     return *this;
   }
+private:
+  virtual void anchor();
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 30f9187..357f555a 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -526,6 +526,7 @@ enum dwarf_constants {
   DW_LANG_D = 0x0013,
   DW_LANG_Python = 0x0014,
   DW_LANG_lo_user = 0x8000,
+  DW_LANG_Mips_Assembler = 0x8001,
   DW_LANG_hi_user = 0xffff,
 
   // Identifier case codes
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index a3906b1..6dd76ea 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -627,6 +627,7 @@ enum {
   R_MIPS_GPREL16           =  7,
   R_MIPS_LITERAL           =  8,
   R_MIPS_GOT16             =  9,
+  R_MIPS_GOT               =  9,
   R_MIPS_PC16              = 10,
   R_MIPS_CALL16            = 11,
   R_MIPS_GPREL32           = 12,
@@ -887,6 +888,7 @@ enum {
   STT_TLS     = 6,   // Thread local data object
   STT_LOOS    = 7,   // Lowest operating system-specific symbol type
   STT_HIOS    = 8,   // Highest operating system-specific symbol type
+  STT_GNU_IFUNC = 10, // GNU indirect function
   STT_LOPROC  = 13,  // Lowest processor-specific symbol type
   STT_HIPROC  = 15   // Highest processor-specific symbol type
 };
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index a868e5f..e6f9926 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -27,14 +27,21 @@
 #ifndef LLVM_SUPPORT_FILE_SYSTEM_H
 #define LLVM_SUPPORT_FILE_SYSTEM_H
 
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/PathV1.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/system_error.h"
 #include <ctime>
 #include <iterator>
+#include <stack>
 #include <string>
+#include <vector>
+
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
 
 namespace llvm {
 namespace sys {
@@ -91,7 +98,20 @@ struct space_info {
 ///               a platform specific member to store the result.
 class file_status
 {
-  // implementation defined status field.
+  #if defined(LLVM_ON_UNIX)
+  dev_t st_dev;
+  ino_t st_ino;
+  #elif defined (LLVM_ON_WIN32)
+  uint32_t LastWriteTimeHigh;
+  uint32_t LastWriteTimeLow;
+  uint32_t VolumeSerialNumber;
+  uint32_t FileSizeHigh;
+  uint32_t FileSizeLow;
+  uint32_t FileIndexHigh;
+  uint32_t FileIndexLow;
+  #endif
+  friend bool equivalent(file_status A, file_status B);
+  friend error_code status(const Twine &path, file_status &result);
   file_type Type;
 public:
   explicit file_status(file_type v=file_type::status_error)
@@ -101,6 +121,44 @@ public:
   void type(file_type v) { Type = v; }
 };
 
+/// file_magic - An "enum class" enumeration of file types based on magic (the first
+///         N bytes of the file).
+struct file_magic {
+  enum _ {
+    unknown = 0,              ///< Unrecognized file
+    bitcode,                  ///< Bitcode file
+    archive,                  ///< ar style archive file
+    elf_relocatable,          ///< ELF Relocatable object file
+    elf_executable,           ///< ELF Executable image
+    elf_shared_object,        ///< ELF dynamically linked shared lib
+    elf_core,                 ///< ELF core image
+    macho_object,             ///< Mach-O Object file
+    macho_executable,         ///< Mach-O Executable
+    macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM
+    macho_core,               ///< Mach-O Core File
+    macho_preload_executabl,  ///< Mach-O Preloaded Executable
+    macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib
+    macho_dynamic_linker,     ///< The Mach-O dynamic linker
+    macho_bundle,             ///< Mach-O Bundle file
+    macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub
+    macho_dsym_companion,     ///< Mach-O dSYM companion file
+    coff_object,              ///< COFF object file
+    pecoff_executable         ///< PECOFF executable file
+  };
+
+  bool is_object() const {
+    return v_ == unknown ? false : true;
+  }
+
+  file_magic() : v_(unknown) {}
+  file_magic(_ v) : v_(v) {}
+  explicit file_magic(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
 /// @}
 /// @name Physical Operators
 /// @{
@@ -241,6 +299,8 @@ bool equivalent(file_status A, file_status B);
 
 /// @brief Do paths represent the same thing?
 ///
+/// assert(status_known(A) || status_known(B));
+///
 /// @param A Input path A.
 /// @param B Input path B.
 /// @param result Set to true if stat(A) and stat(B) have the same device and
@@ -397,13 +457,16 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result);
 error_code get_magic(const Twine &path, uint32_t len,
                      SmallVectorImpl<char> &result);
 
+/// @brief Identify the type of a binary file based on how magical it is.
+file_magic identify_magic(StringRef magic);
+
 /// @brief Get and identify \a path's type based on its content.
 ///
 /// @param path Input path.
 /// @param result Set to the type of file, or LLVMFileType::Unknown_FileType.
 /// @results errc::success if result has been successfully set, otherwise a
 ///          platform specific error_code.
-error_code identify_magic(const Twine &path, LLVMFileType &result);
+error_code identify_magic(const Twine &path, file_magic &result);
 
 /// @brief Get library paths the system linker uses.
 ///
@@ -479,76 +542,171 @@ public:
   bool operator>=(const directory_entry& rhs) const;
 };
 
+namespace detail {
+  struct DirIterState;
+
+  error_code directory_iterator_construct(DirIterState&, StringRef);
+  error_code directory_iterator_increment(DirIterState&);
+  error_code directory_iterator_destruct(DirIterState&);
+
+  /// DirIterState - Keeps state for the directory_iterator. It is reference
+  /// counted in order to preserve InputIterator semantics on copy.
+  struct DirIterState : public RefCountedBase<DirIterState> {
+    DirIterState()
+      : IterationHandle(0) {}
+
+    ~DirIterState() {
+      directory_iterator_destruct(*this);
+    }
+
+    intptr_t IterationHandle;
+    directory_entry CurrentEntry;
+  };
+}
+
 /// directory_iterator - Iterates through the entries in path. There is no
 /// operator++ because we need an error_code. If it's really needed we can make
 /// it call report_fatal_error on error.
 class directory_iterator {
-  intptr_t IterationHandle;
-  directory_entry CurrentEntry;
-
-  // Platform implementations implement these functions to handle iteration.
-  friend error_code directory_iterator_construct(directory_iterator &it,
-                                                 StringRef path);
-  friend error_code directory_iterator_increment(directory_iterator &it);
-  friend error_code directory_iterator_destruct(directory_iterator &it);
+  IntrusiveRefCntPtr<detail::DirIterState> State;
 
 public:
-  explicit directory_iterator(const Twine &path, error_code &ec)
-  : IterationHandle(0) {
+  explicit directory_iterator(const Twine &path, error_code &ec) {
+    State = new detail::DirIterState;
     SmallString<128> path_storage;
-    ec = directory_iterator_construct(*this, path.toStringRef(path_storage));
+    ec = detail::directory_iterator_construct(*State,
+            path.toStringRef(path_storage));
   }
 
-  /// Construct end iterator.
-  directory_iterator() : IterationHandle(0) {}
-
-  ~directory_iterator() {
-    directory_iterator_destruct(*this);
+  explicit directory_iterator(const directory_entry &de, error_code &ec) {
+    State = new detail::DirIterState;
+    ec = detail::directory_iterator_construct(*State, de.path());
   }
 
+  /// Construct end iterator.
+  directory_iterator() : State(new detail::DirIterState) {}
+
   // No operator++ because we need error_code.
   directory_iterator &increment(error_code &ec) {
-    ec = directory_iterator_increment(*this);
+    ec = directory_iterator_increment(*State);
     return *this;
   }
 
-  const directory_entry &operator*() const { return CurrentEntry; }
-  const directory_entry *operator->() const { return &CurrentEntry; }
+  const directory_entry &operator*() const { return State->CurrentEntry; }
+  const directory_entry *operator->() const { return &State->CurrentEntry; }
+
+  bool operator==(const directory_iterator &RHS) const {
+    return State->CurrentEntry == RHS.State->CurrentEntry;
+  }
 
   bool operator!=(const directory_iterator &RHS) const {
-    return CurrentEntry != RHS.CurrentEntry;
+    return !(*this == RHS);
   }
   // Other members as required by
   // C++ Std, 24.1.1 Input iterators [input.iterators]
 };
 
+namespace detail {
+  /// RecDirIterState - Keeps state for the recursive_directory_iterator. It is
+  /// reference counted in order to preserve InputIterator semantics on copy.
+  struct RecDirIterState : public RefCountedBase<RecDirIterState> {
+    RecDirIterState()
+      : Level(0)
+      , HasNoPushRequest(false) {}
+
+    std::stack<directory_iterator, std::vector<directory_iterator> > Stack;
+    uint16_t Level;
+    bool HasNoPushRequest;
+  };
+}
+
 /// recursive_directory_iterator - Same as directory_iterator except for it
 /// recurses down into child directories.
 class recursive_directory_iterator {
-  uint16_t  Level;
-  bool HasNoPushRequest;
-  // implementation directory iterator status
+  IntrusiveRefCntPtr<detail::RecDirIterState> State;
 
 public:
-  explicit recursive_directory_iterator(const Twine &path, error_code &ec);
+  recursive_directory_iterator() {}
+  explicit recursive_directory_iterator(const Twine &path, error_code &ec)
+    : State(new detail::RecDirIterState) {
+    State->Stack.push(directory_iterator(path, ec));
+    if (State->Stack.top() == directory_iterator())
+      State.reset();
+  }
   // No operator++ because we need error_code.
-  directory_iterator &increment(error_code &ec);
+  recursive_directory_iterator &increment(error_code &ec) {
+    static const directory_iterator end_itr;
+
+    if (State->HasNoPushRequest)
+      State->HasNoPushRequest = false;
+    else {
+      file_status st;
+      if ((ec = State->Stack.top()->status(st))) return *this;
+      if (is_directory(st)) {
+        State->Stack.push(directory_iterator(*State->Stack.top(), ec));
+        if (ec) return *this;
+        if (State->Stack.top() != end_itr) {
+          ++State->Level;
+          return *this;
+        }
+        State->Stack.pop();
+      }
+    }
+
+    while (!State->Stack.empty()
+           && State->Stack.top().increment(ec) == end_itr) {
+      State->Stack.pop();
+      --State->Level;
+    }
+
+    // Check if we are done. If so, create an end iterator.
+    if (State->Stack.empty())
+      State.reset();
 
-  const directory_entry &operator*() const;
-  const directory_entry *operator->() const;
+    return *this;
+  }
+
+  const directory_entry &operator*() const { return *State->Stack.top(); }
+  const directory_entry *operator->() const { return &*State->Stack.top(); }
 
   // observers
-  /// Gets the current level. path is at level 0.
-  int level() const;
+  /// Gets the current level. Starting path is at level 0.
+  int level() const { return State->Level; }
+
   /// Returns true if no_push has been called for this directory_entry.
-  bool no_push_request() const;
+  bool no_push_request() const { return State->HasNoPushRequest; }
 
   // modifiers
   /// Goes up one level if Level > 0.
-  void pop();
+  void pop() {
+    assert(State && "Cannot pop and end itertor!");
+    assert(State->Level > 0 && "Cannot pop an iterator with level < 1");
+
+    static const directory_iterator end_itr;
+    error_code ec;
+    do {
+      if (ec)
+        report_fatal_error("Error incrementing directory iterator.");
+      State->Stack.pop();
+      --State->Level;
+    } while (!State->Stack.empty()
+             && State->Stack.top().increment(ec) == end_itr);
+
+    // Check if we are done. If so, create an end iterator.
+    if (State->Stack.empty())
+      State.reset();
+  }
+
   /// Does not go down into the current directory_entry.
-  void no_push();
+  void no_push() { State->HasNoPushRequest = true; }
 
+  bool operator==(const recursive_directory_iterator &RHS) const {
+    return State == RHS.State;
+  }
+
+  bool operator!=(const recursive_directory_iterator &RHS) const {
+    return !(*this == RHS);
+  }
   // Other members as required by
   // C++ Std, 24.1.1 Input iterators [input.iterators]
 };
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 4627557..d085c94 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -51,6 +51,13 @@ inline bool isInt<32>(int64_t x) {
   return static_cast<int32_t>(x) == x;
 }
 
+/// isShiftedInt<N,S> - Checks if a signed integer is an N bit number shifted
+///                     left by S.
+template<unsigned N, unsigned S>
+inline bool isShiftedInt(int64_t x) {
+  return isInt<N+S>(x) && (x % (1<<S) == 0);
+}
+
 /// isUInt - Checks if an unsigned integer fits into the given bit width.
 template<unsigned N>
 inline bool isUInt(uint64_t x) {
@@ -70,6 +77,13 @@ inline bool isUInt<32>(uint64_t x) {
   return static_cast<uint32_t>(x) == x;
 }
 
+/// isShiftedUInt<N,S> - Checks if a unsigned integer is an N bit number shifted
+///                     left by S.
+template<unsigned N, unsigned S>
+inline bool isShiftedUInt(uint64_t x) {
+  return isUInt<N+S>(x) && (x % (1<<S) == 0);
+}
+
 /// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
 /// bit width.
 inline bool isUIntN(unsigned N, uint64_t x) {
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index a502657..a85f235 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/Path.h"
 
 namespace llvm {
+class error_code;
 namespace sys {
 
   // TODO: Add operations to communicate with the process, redirect its I/O,
@@ -122,12 +123,12 @@ namespace sys {
     /// @brief Construct a Program by finding it by name.
     static Path FindProgramByName(const std::string& name);
 
-    // These methods change the specified standard stream (stdin,
-    // stdout, or stderr) to binary mode. They return true if an error
-    // occurred
-    static bool ChangeStdinToBinary();
-    static bool ChangeStdoutToBinary();
-    static bool ChangeStderrToBinary();
+    // These methods change the specified standard stream (stdin, stdout, or
+    // stderr) to binary mode. They return errc::success if the specified stream
+    // was changed. Otherwise a platform dependent error is returned.
+    static error_code ChangeStdinToBinary();
+    static error_code ChangeStdoutToBinary();
+    static error_code ChangeStderrToBinary();
 
     /// A convenience function equivalent to Program prg; prg.Execute(..);
     /// prg.Wait(..);
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index e1ef39e..ea55c91 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -44,6 +44,7 @@ namespace llvm {
   class MCTargetAsmLexer;
   class MCTargetAsmParser;
   class TargetMachine;
+  class TargetOptions;
   class raw_ostream;
   class formatted_raw_ostream;
 
@@ -86,6 +87,7 @@ namespace llvm {
                                                   StringRef TT,
                                                   StringRef CPU,
                                                   StringRef Features,
+                                                  const TargetOptions &Options,
                                                   Reloc::Model RM,
                                                   CodeModel::Model CM,
                                                   CodeGenOpt::Level OL);
@@ -334,13 +336,14 @@ namespace llvm {
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
     TargetMachine *createTargetMachine(StringRef Triple, StringRef CPU,
-                             StringRef Features,
+                             StringRef Features, const TargetOptions &Options,
                              Reloc::Model RM = Reloc::Default,
                              CodeModel::Model CM = CodeModel::Default,
                              CodeGenOpt::Level OL = CodeGenOpt::Default) const {
       if (!TargetMachineCtorFn)
         return 0;
-      return TargetMachineCtorFn(*this, Triple, CPU, Features, RM, CM, OL);
+      return TargetMachineCtorFn(*this, Triple, CPU, Features, Options,
+                                 RM, CM, OL);
     }
 
     /// createMCAsmBackend - Create a target specific assembly parser.
@@ -1017,10 +1020,11 @@ namespace llvm {
   private:
     static TargetMachine *Allocator(const Target &T, StringRef TT,
                                     StringRef CPU, StringRef FS,
+                                    const TargetOptions &Options,
                                     Reloc::Model RM,
                                     CodeModel::Model CM,
                                     CodeGenOpt::Level OL) {
-      return new TargetMachineImpl(T, TT, CPU, FS, RM, CM, OL);
+      return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL);
     }
   };
 
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index 9b8d277..e147647 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -17,10 +17,10 @@
 #define LLVM_SYSTEM_VALGRIND_H
 
 #include "llvm/Support/Compiler.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 #include <stddef.h>
 
-#if ENABLE_THREADS != 0 && !defined(NDEBUG)
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
 // tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact
 // functions by name.
 extern "C" {
@@ -42,7 +42,7 @@ namespace sys {
   // Otherwise valgrind may continue to execute the old version of the code.
   void ValgrindDiscardTranslations(const void *Addr, size_t Len);
 
-#if ENABLE_THREADS != 0 && !defined(NDEBUG)
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
   // Thread Sanitizer is a valgrind tool that finds races in code.
   // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
 
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 9714172..c9b6a8e 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -423,7 +423,7 @@ class Predicate<string cond> {
 /// NoHonorSignDependentRounding - This predicate is true if support for
 /// sign-dependent-rounding is not enabled.
 def NoHonorSignDependentRounding
- : Predicate<"!HonorSignDependentRoundingFPMath()">;
+ : Predicate<"!TM.Options.HonorSignDependentRoundingFPMath()">;
 
 class Requires<list<Predicate> preds> {
   list<Predicate> Predicates = preds;
@@ -688,6 +688,11 @@ def COPY : Instruction {
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
+def BUNDLE : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "BUNDLE";
+}
 }
 
 //===----------------------------------------------------------------------===//
@@ -733,7 +738,20 @@ class AssemblerPredicate<string cond> {
   string AssemblerCondString = cond;
 }
 
-
+/// TokenAlias - This class allows targets to define assembler token
+/// operand aliases. That is, a token literal operand which is equivalent
+/// to another, canonical, token literal. For example, ARM allows:
+///   vmov.u32 s4, #0  -> vmov.i32, #0
+/// 'u32' is a more specific designator for the 32-bit integer type specifier
+/// and is legal for any instruction which accepts 'i32' as a datatype suffix.
+///   def : TokenAlias<".u32", ".i32">;
+///
+/// This works by marking the match class of 'From' as a subclass of the
+/// match class of 'To'.
+class TokenAlias<string From, string To> {
+  string FromToken = From;
+  string ToToken = To;
+}
 
 /// MnemonicAlias - This class allows targets to define assembler mnemonic
 /// aliases.  This should be used when all forms of one mnemonic are accepted
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 3be1856..a6251e7 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_TARGET_TARGETCALLINGCONV_H
 #define LLVM_TARGET_TARGETCALLINGCONV_H
 
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
 #include <string>
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 590fc1e..8409229 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -535,7 +535,7 @@ public:
 
   /// isUnpredicatedTerminator - Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const = 0;
 
   /// PredicateInstruction - Convert the instruction into a predicated
   /// instruction. It returns true if the operation was successful.
@@ -648,6 +648,15 @@ public:
                                 SDNode *DefNode, unsigned DefIdx,
                                 SDNode *UseNode, unsigned UseIdx) const;
 
+  /// getOutputLatency - Compute and return the output dependency latency of a
+  /// a given pair of defs which both target the same register. This is usually
+  /// one.
+  virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *DepMI) const {
+    return 1;
+  }
+
   /// getInstrLatency - Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
   /// PredCost.
@@ -814,6 +823,7 @@ public:
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
                                    const MachineMemOperand *&MMO,
                                    int &FrameIndex) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
   virtual bool PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const;
   virtual void reMaterialize(MachineBasicBlock &MBB,
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index 7770a11..0d7a949 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -18,33 +18,187 @@ namespace llvm {
 
   namespace LibFunc {
     enum Func {
-      /// void *memset(void *b, int c, size_t len);
-      memset,
-      
-      // void *memcpy(void *s1, const void *s2, size_t n);
+      /// double acos(double x);
+      acos,
+      /// long double acosl(long double x);
+      acosl,
+      /// float acosf(float x);
+      acosf,
+      /// double asin(double x);
+      asin,
+      /// long double asinl(long double x);
+      asinl,
+      /// float asinf(float x);
+      asinf,
+      /// double atan(double x);
+      atan,
+      /// long double atanl(long double x);
+      atanl,
+      /// float atanf(float x);
+      atanf,
+      /// double atan2(double y, double x);
+      atan2,
+      /// long double atan2l(long double y, long double x);
+      atan2l,
+      /// float atan2f(float y, float x);
+      atan2f,
+      /// double ceil(double x);
+      ceil,
+      /// long double ceill(long double x);
+      ceill,
+      /// float ceilf(float x);
+      ceilf,
+      /// double copysign(double x, double y);
+      copysign,
+      /// float copysignf(float x, float y);
+      copysignf,
+      /// long double copysignl(long double x, long double y);
+      copysignl,
+      /// double cos(double x);
+      cos,
+      /// long double cosl(long double x);
+      cosl,
+      /// float cosf(float x);
+      cosf,
+      /// double cosh(double x);
+      cosh,
+      /// long double coshl(long double x);
+      coshl,
+      /// float coshf(float x);
+      coshf,
+      /// double exp(double x);
+      exp,
+      /// long double expl(long double x);
+      expl,
+      /// float expf(float x);
+      expf,
+      /// double exp2(double x);
+      exp2,
+      /// long double exp2l(long double x);
+      exp2l,
+      /// float exp2f(float x);
+      exp2f,
+      /// double expm1(double x);
+      expm1,
+      /// long double expm1l(long double x);
+      expm1l,
+      /// float expm1f(float x);
+      expl1f,
+      /// double fabs(double x);
+      fabs,
+      /// long double fabsl(long double x);
+      fabsl,
+      /// float fabsf(float x);
+      fabsf,
+      /// double floor(double x);
+      floor,
+      /// long double floorl(long double x);
+      floorl,
+      /// float floorf(float x);
+      floorf,
+      /// int fiprintf(FILE *stream, const char *format, ...);
+      fiprintf,
+      /// double fmod(double x, double y);
+      fmod,
+      /// long double fmodl(long double x, long double y);
+      fmodl,
+      /// float fmodf(float x, float y);
+      fmodf,
+      /// int fputs(const char *s, FILE *stream);
+      fputs,
+      /// size_t fwrite(const void *ptr, size_t size, size_t nitems,
+      /// FILE *stream);
+      fwrite,
+      /// int iprintf(const char *format, ...);
+      iprintf,
+      /// double log(double x);
+      log,
+      /// long double logl(long double x);
+      logl,
+      /// float logf(float x);
+      logf,
+      /// double log2(double x);
+      log2,
+      /// double long double log2l(long double x);
+      log2l,
+      /// float log2f(float x);
+      log2f,
+      /// double log10(double x);
+      log10,
+      /// long double log10l(long double x);
+      log10l,
+      /// float log10f(float x);
+      log10f,
+      /// double log1p(double x);
+      log1p,
+      /// long double log1pl(long double x);
+      log1pl,
+      /// float log1pf(float x);
+      log1pf,
+      /// void *memcpy(void *s1, const void *s2, size_t n);
       memcpy,
-      
-      // void *memmove(void *s1, const void *s2, size_t n);
+      /// void *memmove(void *s1, const void *s2, size_t n);
       memmove,
-      
+      /// void *memset(void *b, int c, size_t len);
+      memset,
       /// void memset_pattern16(void *b, const void *pattern16, size_t len);
       memset_pattern16,
-      
-      /// int iprintf(const char *format, ...);
-      iprintf,
-      
+      /// double nearbyint(double x);
+      nearbyint,
+      /// float nearbyintf(float x);
+      nearbyintf,
+      /// long double nearbyintl(long double x);
+      nearbyintl,
+      /// double pow(double x, double y);
+      pow,
+      /// float powf(float x, float y);
+      powf,
+      /// long double powl(long double x, long double y);
+      powl,
+      /// double rint(double x);
+      rint,
+      /// float rintf(float x);
+      rintf,
+      /// long dobule rintl(long double x);
+      rintl,
+      /// double sin(double x);
+      sin,
+      /// long double sinl(long double x);
+      sinl,
+      /// float sinf(float x);
+      sinf,
+      /// double sinh(double x);
+      sinh,
+      /// long double sinhl(long double x);
+      sinhl,
+      /// float sinhf(float x);
+      sinhf,
       /// int siprintf(char *str, const char *format, ...);
       siprintf,
-      
-      /// int fiprintf(FILE *stream, const char *format, ...);
-      fiprintf,
-
-      // size_t fwrite(const void *ptr, size_t size, size_t nitems,
-      //               FILE *stream);
-      fwrite,
-
-      // int fputs(const char *s, FILE *stream);
-      fputs,
+      /// double sqrt(double x);
+      sqrt,
+      /// long double sqrtl(long double x);
+      sqrtl,
+      /// float sqrtf(float x);
+      sqrtf,
+      /// double tan(double x);
+      tan,
+      /// long double tanl(long double x);
+      tanl,
+      /// float tanf(float x);
+      tanf,
+      /// double tanh(double x);
+      tanh,
+      /// long double tanhl(long double x);
+      tanhl,
+      /// float tanhf(float x);
+      tanhf,
+      /// double trunc(double x);
+      trunc,
+      /// float truncf(float x);
+      truncf,
+      /// long double truncl(long double x);
+      truncl,
 
       NumLibFuncs
     };
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 3fe3a38..4b66956 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -36,27 +36,19 @@
 #include <vector>
 
 namespace llvm {
-  class AllocaInst;
-  class APFloat;
   class CallInst;
   class CCState;
-  class Function;
   class FastISel;
   class FunctionLoweringInfo;
   class ImmutableCallSite;
   class MachineBasicBlock;
   class MachineFunction;
-  class MachineFrameInfo;
   class MachineInstr;
   class MachineJumpTableInfo;
   class MCContext;
   class MCExpr;
-  class SDNode;
-  class SDValue;
-  class SelectionDAG;
   template<typename T> class SmallVectorImpl;
   class TargetData;
-  class TargetMachine;
   class TargetRegisterClass;
   class TargetLoweringObjectFile;
   class Value;
@@ -520,8 +512,19 @@ public:
   /// AllowUnknown is true, this will return MVT::Other for types with no EVT
   /// counterpart (e.g. structs), otherwise it will assert.
   EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
-    EVT VT = EVT::getEVT(Ty, AllowUnknown);
-    return VT == MVT::iPTR ? PointerTy : VT;
+    // Lower scalar pointers to native pointer types.
+    if (Ty->isPointerTy()) return PointerTy;
+
+    if (Ty->isVectorTy()) {
+      VectorType *VTy = cast<VectorType>(Ty);
+      Type *Elm = VTy->getElementType();
+      // Lower vectors of pointers to native pointer types.
+      if (Elm->isPointerTy()) 
+        Elm = EVT(PointerTy).getTypeForEVT(Ty->getContext());
+      return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
+                       VTy->getNumElements());
+    }
+    return EVT::getEVT(Ty, AllowUnknown);
   }
 
   /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index db42350..c169e06 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/ADT/StringRef.h"
 #include <cassert>
@@ -63,7 +64,7 @@ class TargetMachine {
   void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
   TargetMachine(const Target &T, StringRef TargetTriple,
-                StringRef CPU, StringRef FS);
+                StringRef CPU, StringRef FS, const TargetOptions &Options);
 
   /// getSubtargetImpl - virtual method implemented by subclasses that returns
   /// a reference to that target's TargetSubtargetInfo-derived member variable.
@@ -101,6 +102,8 @@ public:
   const StringRef getTargetCPU() const { return TargetCPU; }
   const StringRef getTargetFeatureString() const { return TargetFS; }
 
+  TargetOptions Options;
+
   // Interfaces to the major aspects of target machine information:
   // -- Instruction opcode and operand information
   // -- Pipelines and scheduling information
@@ -249,7 +252,7 @@ public:
   virtual bool addPassesToEmitFile(PassManagerBase &,
                                    formatted_raw_ostream &,
                                    CodeGenFileType,
-                                   bool = true) {
+                                   bool /*DisableVerify*/ = true) {
     return true;
   }
 
@@ -261,7 +264,7 @@ public:
   ///
   virtual bool addPassesToEmitMachineCode(PassManagerBase &,
                                           JITCodeEmitter &,
-                                          bool = true) {
+                                          bool /*DisableVerify*/ = true) {
     return true;
   }
 
@@ -273,7 +276,7 @@ public:
   virtual bool addPassesToEmitMC(PassManagerBase &,
                                  MCContext *&,
                                  raw_ostream &,
-                                 bool = true) {
+                                 bool /*DisableVerify*/ = true) {
     return true;
   }
 };
@@ -284,10 +287,20 @@ public:
 class LLVMTargetMachine : public TargetMachine {
 protected: // Can only create subclasses.
   LLVMTargetMachine(const Target &T, StringRef TargetTriple,
-                    StringRef CPU, StringRef FS,
+                    StringRef CPU, StringRef FS, TargetOptions Options,
                     Reloc::Model RM, CodeModel::Model CM,
                     CodeGenOpt::Level OL);
 
+  /// printNoVerify - Add a pass to dump the machine function, if debugging is
+  /// enabled.
+  ///
+  void printNoVerify(PassManagerBase &PM, const char *Banner) const;
+
+  /// printAndVerify - Add a pass to dump then verify the machine function, if
+  /// those steps are enabled.
+  ///
+  void printAndVerify(PassManagerBase &PM, const char *Banner) const;
+
 private:
   /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
   /// both emitting to assembly files or machine code output.
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 37f7b2f..f0b181e 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -82,7 +82,12 @@ namespace TargetOpcode {
 
     /// COPY - Target-independent register copy. This instruction can also be
     /// used to copy between subregisters of virtual registers.
-    COPY = 13
+    COPY = 13,
+
+    /// BUNDLE - This instruction represents an instruction bundle. Instructions
+    /// which immediately follow a BUNDLE instruction which are marked with
+    /// 'InsideBundle' flag are inside the bundle.
+    BUNDLE
   };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 3691341..3001816 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_TARGET_TARGETOPTIONS_H
 #define LLVM_TARGET_TARGETOPTIONS_H
 
+#include <string>
+
 namespace llvm {
   class MachineFunction;
   class StringRef;
@@ -27,140 +29,156 @@ namespace llvm {
       Hard  // Hard float.
     };
   }
-  
-  /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
-  /// option is specified on the command line, and should enable debugging
-  /// output from the code generator.
-  extern bool PrintMachineCode;
-
-  /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is
-  /// specified on the command line.  If the target supports the frame pointer
-  /// elimination optimization, this option should disable it.
-  extern bool NoFramePointerElim;
-
-  /// NoFramePointerElimNonLeaf - This flag is enabled when the
-  /// -disable-non-leaf-fp-elim is specified on the command line. If the target
-  /// supports the frame pointer elimination optimization, this option should
-  /// disable it for non-leaf functions.
-  extern bool NoFramePointerElimNonLeaf;
-
-  /// DisableFramePointerElim - This returns true if frame pointer elimination
-  /// optimization should be disabled for the given machine function.
-  extern bool DisableFramePointerElim(const MachineFunction &MF);
-
-  /// LessPreciseFPMAD - This flag is enabled when the
-  /// -enable-fp-mad is specified on the command line.  When this flag is off
-  /// (the default), the code generator is not allowed to generate mad
-  /// (multiply add) if the result is "less precise" than doing those operations
-  /// individually.
-  extern bool LessPreciseFPMADOption;
-  extern bool LessPreciseFPMAD();
-
-  /// NoExcessFPPrecision - This flag is enabled when the
-  /// -disable-excess-fp-precision flag is specified on the command line.  When
-  /// this flag is off (the default), the code generator is allowed to produce
-  /// results that are "more precise" than IEEE allows.  This includes use of
-  /// FMA-like operations and use of the X86 FP registers without rounding all
-  /// over the place.
-  extern bool NoExcessFPPrecision;
-
-  /// UnsafeFPMath - This flag is enabled when the
-  /// -enable-unsafe-fp-math flag is specified on the command line.  When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// produce results that are "less precise" than IEEE allows.  This includes
-  /// use of X86 instructions like FSIN and FCOS instead of libcalls.
-  /// UnsafeFPMath implies LessPreciseFPMAD.
-  extern bool UnsafeFPMath;
-
-  /// NoInfsFPMath - This flag is enabled when the
-  /// -enable-no-infs-fp-math flag is specified on the command line. When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// assume the FP arithmetic arguments and results are never +-Infs.
-  extern bool NoInfsFPMath;
-
-  /// NoNaNsFPMath - This flag is enabled when the
-  /// -enable-no-nans-fp-math flag is specified on the command line. When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// assume the FP arithmetic arguments and results are never NaNs.
-  extern bool NoNaNsFPMath;
-
-  /// HonorSignDependentRoundingFPMath - This returns true when the
-  /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
-  /// false (the default), the code generator is allowed to assume that the
-  /// rounding behavior is the default (round-to-zero for all floating point to
-  /// integer conversions, and round-to-nearest for all other arithmetic
-  /// truncations).  If this is enabled (set to true), the code generator must
-  /// assume that the rounding mode may dynamically change.
-  extern bool HonorSignDependentRoundingFPMathOption;
-  extern bool HonorSignDependentRoundingFPMath();
-  
-  /// UseSoftFloat - This flag is enabled when the -soft-float flag is specified
-  /// on the command line.  When this flag is on, the code generator will
-  /// generate libcalls to the software floating point library instead of
-  /// target FP instructions.
-  extern bool UseSoftFloat;
-
-  /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
-  /// on the command line. This setting may either be Default, Soft, or Hard.
-  /// Default selects the target's default behavior. Soft selects the ABI for
-  /// UseSoftFloat, but does not inidcate that FP hardware may not be used.
-  /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
-  /// Hard presumes that the normal FP ABI is used.
-  extern FloatABI::ABIType FloatABIType;
-
-  /// NoZerosInBSS - By default some codegens place zero-initialized data to
-  /// .bss section. This flag disables such behaviour (necessary, e.g. for
-  /// crt*.o compiling).
-  extern bool NoZerosInBSS;
-
-  /// JITExceptionHandling - This flag indicates that the JIT should emit
-  /// exception handling information.
-  extern bool JITExceptionHandling;
-
-  /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
-  /// debug information and notify a debugger about it.
-  extern bool JITEmitDebugInfo;
-
-  /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
-  /// the object files generated by the JITEmitDebugInfo flag to disk.  This
-  /// flag is hidden and is only for debugging the debug info.
-  extern bool JITEmitDebugInfoToDisk;
-
-  /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is
-  /// specified on the commandline. When the flag is on, participating targets
-  /// will perform tail call optimization on all calls which use the fastcc
-  /// calling convention and which satisfy certain target-independent
-  /// criteria (being at the end of a function, having the same return type
-  /// as their parent function, etc.), using an alternate ABI if necessary.
-  extern bool GuaranteedTailCallOpt;
-
-  /// StackAlignmentOverride - Override default stack alignment for target.
-  extern unsigned StackAlignmentOverride;
-
-  /// RealignStack - This flag indicates whether the stack should be
-  /// automatically realigned, if needed.
-  extern bool RealignStack;
-
-  /// DisableJumpTables - This flag indicates jump tables should not be 
-  /// generated.
-  extern bool DisableJumpTables;
-
-  /// EnableFastISel - This flag enables fast-path instruction selection
-  /// which trades away generated code quality in favor of reducing
-  /// compile time.
-  extern bool EnableFastISel;
-  
+
   /// StrongPHIElim - This flag enables more aggressive PHI elimination
   /// wth earlier copy coalescing.
   extern bool StrongPHIElim;
 
-  /// getTrapFunctionName - If this returns a non-empty string, this means isel
-  /// should lower Intrinsic::trap to a call to the specified function name
-  /// instead of an ISD::TRAP node.
-  extern StringRef getTrapFunctionName();
-
-  extern bool EnableSegmentedStacks;
-
+  class TargetOptions {
+  public:
+    TargetOptions()
+        : PrintMachineCode(false), NoFramePointerElim(false),
+          NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
+          NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+          NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
+          UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
+          JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
+          GuaranteedTailCallOpt(false), StackAlignmentOverride(0),
+          RealignStack(true), DisableJumpTables(false), EnableFastISel(false),
+          EnableSegmentedStacks(false), TrapFuncName(""),
+          FloatABIType(FloatABI::Default)
+    {}
+
+    /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
+    /// option is specified on the command line, and should enable debugging
+    /// output from the code generator.
+    unsigned PrintMachineCode : 1;
+
+    /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is
+    /// specified on the command line.  If the target supports the frame pointer
+    /// elimination optimization, this option should disable it.
+    unsigned NoFramePointerElim : 1;
+
+    /// NoFramePointerElimNonLeaf - This flag is enabled when the
+    /// -disable-non-leaf-fp-elim is specified on the command line. If the
+    /// target supports the frame pointer elimination optimization, this option
+    /// should disable it for non-leaf functions.
+    unsigned NoFramePointerElimNonLeaf : 1;
+
+    /// DisableFramePointerElim - This returns true if frame pointer elimination
+    /// optimization should be disabled for the given machine function.
+    bool DisableFramePointerElim(const MachineFunction &MF) const;
+
+    /// LessPreciseFPMAD - This flag is enabled when the
+    /// -enable-fp-mad is specified on the command line.  When this flag is off
+    /// (the default), the code generator is not allowed to generate mad
+    /// (multiply add) if the result is "less precise" than doing those
+    /// operations individually.
+    unsigned LessPreciseFPMADOption : 1;
+    bool LessPreciseFPMAD() const;
+
+    /// NoExcessFPPrecision - This flag is enabled when the
+    /// -disable-excess-fp-precision flag is specified on the command line.
+    /// When this flag is off (the default), the code generator is allowed to
+    /// produce results that are "more precise" than IEEE allows.  This includes
+    /// use of FMA-like operations and use of the X86 FP registers without
+    /// rounding all over the place.
+    unsigned NoExcessFPPrecision : 1;
+
+    /// UnsafeFPMath - This flag is enabled when the
+    /// -enable-unsafe-fp-math flag is specified on the command line.  When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// produce results that are "less precise" than IEEE allows.  This includes
+    /// use of X86 instructions like FSIN and FCOS instead of libcalls.
+    /// UnsafeFPMath implies LessPreciseFPMAD.
+    unsigned UnsafeFPMath : 1;
+
+    /// NoInfsFPMath - This flag is enabled when the
+    /// -enable-no-infs-fp-math flag is specified on the command line. When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// assume the FP arithmetic arguments and results are never +-Infs.
+    unsigned NoInfsFPMath : 1;
+
+    /// NoNaNsFPMath - This flag is enabled when the
+    /// -enable-no-nans-fp-math flag is specified on the command line. When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// assume the FP arithmetic arguments and results are never NaNs.
+    unsigned NoNaNsFPMath : 1;
+
+    /// HonorSignDependentRoundingFPMath - This returns true when the
+    /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
+    /// false (the default), the code generator is allowed to assume that the
+    /// rounding behavior is the default (round-to-zero for all floating point
+    /// to integer conversions, and round-to-nearest for all other arithmetic
+    /// truncations).  If this is enabled (set to true), the code generator must
+    /// assume that the rounding mode may dynamically change.
+    unsigned HonorSignDependentRoundingFPMathOption : 1;
+    bool HonorSignDependentRoundingFPMath() const;
+  
+    /// UseSoftFloat - This flag is enabled when the -soft-float flag is
+    /// specified on the command line.  When this flag is on, the code generator
+    /// will generate libcalls to the software floating point library instead of
+    /// target FP instructions.
+    unsigned UseSoftFloat : 1;
+
+    /// NoZerosInBSS - By default some codegens place zero-initialized data to
+    /// .bss section. This flag disables such behaviour (necessary, e.g. for
+    /// crt*.o compiling).
+    unsigned NoZerosInBSS : 1;
+
+    /// JITExceptionHandling - This flag indicates that the JIT should emit
+    /// exception handling information.
+    unsigned JITExceptionHandling : 1;
+
+    /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
+    /// debug information and notify a debugger about it.
+    unsigned JITEmitDebugInfo : 1;
+
+    /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
+    /// the object files generated by the JITEmitDebugInfo flag to disk.  This
+    /// flag is hidden and is only for debugging the debug info.
+    unsigned JITEmitDebugInfoToDisk : 1;
+
+    /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is
+    /// specified on the commandline. When the flag is on, participating targets
+    /// will perform tail call optimization on all calls which use the fastcc
+    /// calling convention and which satisfy certain target-independent
+    /// criteria (being at the end of a function, having the same return type
+    /// as their parent function, etc.), using an alternate ABI if necessary.
+    unsigned GuaranteedTailCallOpt : 1;
+
+    /// StackAlignmentOverride - Override default stack alignment for target.
+    unsigned StackAlignmentOverride;
+
+    /// RealignStack - This flag indicates whether the stack should be
+    /// automatically realigned, if needed.
+    unsigned RealignStack : 1;
+
+    /// DisableJumpTables - This flag indicates jump tables should not be 
+    /// generated.
+    unsigned DisableJumpTables : 1;
+
+    /// EnableFastISel - This flag enables fast-path instruction selection
+    /// which trades away generated code quality in favor of reducing
+    /// compile time.
+    unsigned EnableFastISel : 1;
+  
+    unsigned EnableSegmentedStacks : 1;
+
+    /// getTrapFunctionName - If this returns a non-empty string, this means
+    /// isel should lower Intrinsic::trap to a call to the specified function
+    /// name instead of an ISD::TRAP node.
+    std::string TrapFuncName;
+    StringRef getTrapFunctionName() const;
+
+    /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
+    /// on the command line. This setting may either be Default, Soft, or Hard.
+    /// Default selects the target's default behavior. Soft selects the ABI for
+    /// UseSoftFloat, but does not indicate that FP hardware may not be used.
+    /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
+    /// Hard presumes that the normal FP ABI is used.
+    FloatABI::ABIType FloatABIType;
+  };
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 612635e..3288dd4 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -352,6 +352,8 @@ def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
 def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
 def cttz       : SDNode<"ISD::CTTZ"       , SDTIntUnaryOp>;
 def ctpop      : SDNode<"ISD::CTPOP"      , SDTIntUnaryOp>;
+def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>;
+def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>;
 def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
 def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
 def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index cc74e7f..d265bda 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -67,7 +67,12 @@ public:
     /// EP_ScalarOptimizerLate - This extension point allows adding optimization
     /// passes after most of the main optimizations, but before the last
     /// cleanup-ish optimizations.
-    EP_ScalarOptimizerLate
+    EP_ScalarOptimizerLate,
+
+    /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that
+    /// should not be disabled by O0 optimization level. The passes will be
+    /// inserted after the inlining pass.
+    EP_EnabledOnOptLevel0
   };
 
   /// The Optimization Level - Specify the basic optimization level.
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index f9b9623..baa6364 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -29,7 +29,8 @@ ModulePass *createPathProfilerPass();
 
 // Insert GCOV profiling instrumentation
 ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
-                                   bool Use402Format = false);
+                                   bool Use402Format = false,
+                                   bool UseExtraChecksum = false);
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
 ModulePass *createAddressSanitizerPass();
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 5c0e9c6..2c4f650 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -309,12 +309,6 @@ extern char &InstructionNamerID;
   
 //===----------------------------------------------------------------------===//
 //
-// GEPSplitter - Split complex GEPs into simple ones
-//
-FunctionPass *createGEPSplitterPass();
-
-//===----------------------------------------------------------------------===//
-//
 // Sink - Code Sinking
 //
 FunctionPass *createSinkingPass();
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6fcd160..867b9e4 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -173,9 +173,8 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
 /// complicated to handle the case where one of the edges being split
 /// is an exit of a loop with other exits).
 ///
-BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
-                                   unsigned NumPreds, const char *Suffix,
-                                   Pass *P = 0);
+BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock*> Preds,
+                                   const char *Suffix, Pass *P = 0);
 
 /// SplitLandingPadPredecessors - This method transforms the landing pad,
 /// OrigBB, by introducing two new basic blocks into the function. One of those
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index d7e12a6..2c0ec9b 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -25,6 +25,9 @@ class Function;
 /// http://llvm.org/docs/LangRef.html#intg_global_ctors
 void appendToGlobalCtors(Module &M, Function *F, int Priority);
 
+/// Same as appendToGlobalCtors(), but for global dtors.
+void appendToGlobalDtors(Module &M, Function *F, int Priority);
+
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 7212a8c..f175e83 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -22,9 +22,12 @@ class Loop;
 class LoopInfo;
 class LPPassManager;
 
-bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
                 unsigned TripMultiple, LoopInfo* LI, LPPassManager* LPM);
 
+bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+                             LPPassManager* LPM);
+
 }
 
 #endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 43b7dc5..a571b4d 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -273,6 +273,10 @@ public:
   /// otherwise return 'this'.
   Type *getScalarType();
 
+  /// getNumElements - If this is a vector type, return the number of elements,
+  /// otherwise return zero.
+  unsigned getNumElements();
+
   //===--------------------------------------------------------------------===//
   // Type Iteration support.
   //
diff --git a/include/llvm/User.h b/include/llvm/User.h
index 62bc9f0..d3f4f21 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -34,6 +34,7 @@ class User : public Value {
   void *operator new(size_t);     // Do not implement
   template <unsigned>
   friend struct HungoffOperandTraits;
+  virtual void anchor();
 protected:
   /// OperandList - This is a pointer to the array of Uses for this User.
   /// For nodes of fixed arity (e.g. a binary operator) this array will live
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e79459d..cb1e1eb 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -58,10 +58,4 @@ add_llvm_library(LLVMAnalysis
   ValueTracking.cpp
   )
 
-add_llvm_library_dependencies(LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(IPA)
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index df79849..8e2f263 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Operator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -542,8 +543,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 /// explicitly cast them so that they aren't implicitly casted by the
 /// getelementptr.
 static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
-                                Type *ResultTy,
-                                const TargetData *TD) {
+                                Type *ResultTy, const TargetData *TD,
+                                const TargetLibraryInfo *TLI) {
   if (!TD) return 0;
   Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
 
@@ -568,7 +569,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   Constant *C =
     ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   return C;
 }
@@ -576,10 +577,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
 /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
-                                         Type *ResultTy,
-                                         const TargetData *TD) {
+                                         Type *ResultTy, const TargetData *TD,
+                                         const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
-  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+      !Ptr->getType()->isPointerTy())
     return 0;
   
   Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
@@ -602,7 +604,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
           Res = ConstantExpr::getSub(Res, CE->getOperand(1));
           Res = ConstantExpr::getIntToPtr(Res, ResultTy);
           if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
-            Res = ConstantFoldConstantExpression(ResCE, TD);
+            Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
           return Res;
         }
       }
@@ -729,7 +731,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 /// Note that this fails if not all of the operands are constant.  Otherwise,
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I,
+                                        const TargetData *TD,
+                                        const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = 0;
@@ -765,7 +769,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
-                                           TD);
+                                           TD, TLI);
   
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     return ConstantFoldLoadInst(LI, TD);
@@ -781,28 +785,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
                                     cast<Constant>(EVI->getAggregateOperand()),
                                     EVI->getIndices());
 
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const TargetData *TD) {
+                                               const TargetData *TD,
+                                               const TargetLibraryInfo *TLI) {
   SmallVector<Constant*, 8> Ops;
   for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
        i != e; ++i) {
     Constant *NewC = cast<Constant>(*i);
     // Recursively fold the ConstantExpr's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD);
+      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
     Ops.push_back(NewC);
   }
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                           TD);
-  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD);
+                                           TD, TLI);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -817,7 +822,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, 
                                          ArrayRef<Constant *> Ops,
-                                         const TargetData *TD) {
+                                         const TargetData *TD,
+                                         const TargetLibraryInfo *TLI) {                                         
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
@@ -834,7 +840,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   case Instruction::Call:
     if (Function *F = dyn_cast<Function>(Ops.back()))
       if (canConstantFoldCallTo(F))
-        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1));
+        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
     return 0;
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
@@ -888,9 +894,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    if (Constant *C = CastGEPIndices(Ops, DestTy, TD))
+    if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
       return C;
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
       return C;
     
     return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
@@ -903,7 +909,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
 ///
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 Constant *Ops0, Constant *Ops1, 
-                                                const TargetData *TD) {
+                                                const TargetData *TD,
+                                                const TargetLibraryInfo *TLI) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
   // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
@@ -920,7 +927,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
         Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
       }
       
       // Only do this transformation if the int is intptrty in size, otherwise
@@ -929,7 +936,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           CE0->getType() == IntPtrTy) {
         Constant *C = CE0->getOperand(0);
         Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
       }
     }
     
@@ -944,7 +951,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                       IntPtrTy, false);
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
-          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
@@ -953,7 +960,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
              CE0->getType() == IntPtrTy &&
              CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
           return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
-                                                 CE1->getOperand(0), TD);
+                                                 CE1->getOperand(0), TD, TLI);
       }
     }
     
@@ -962,13 +969,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
     if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
         CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
       Constant *LHS = 
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
+                                        TD, TLI);
       Constant *RHS = 
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
+                                        TD, TLI);
       unsigned OpC = 
         Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
       Constant *Ops[] = { LHS, RHS };
-      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD);
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
     }
   }
   
@@ -1045,6 +1054,7 @@ bool
 llvm::canConstantFoldCallTo(const Function *F) {
   switch (F->getIntrinsicID()) {
   case Intrinsic::sqrt:
+  case Intrinsic::pow:
   case Intrinsic::powi:
   case Intrinsic::bswap:
   case Intrinsic::ctpop:
@@ -1168,7 +1178,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
 Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                       const TargetLibraryInfo *TLI) {
   if (!F->hasName()) return 0;
   StringRef Name = F->getName();
 
@@ -1183,6 +1194,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
 
         return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
       }
+      if (!TLI)
+        return 0;
 
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
@@ -1201,43 +1214,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
                                      Op->getValueAPF().convertToDouble();
       switch (Name[0]) {
       case 'a':
-        if (Name == "acos")
+        if (Name == "acos" && TLI->has(LibFunc::acos))
           return ConstantFoldFP(acos, V, Ty);
-        else if (Name == "asin")
+        else if (Name == "asin" && TLI->has(LibFunc::asin))
           return ConstantFoldFP(asin, V, Ty);
-        else if (Name == "atan")
+        else if (Name == "atan" && TLI->has(LibFunc::atan))
           return ConstantFoldFP(atan, V, Ty);
         break;
       case 'c':
-        if (Name == "ceil")
+        if (Name == "ceil" && TLI->has(LibFunc::ceil))
           return ConstantFoldFP(ceil, V, Ty);
-        else if (Name == "cos")
+        else if (Name == "cos" && TLI->has(LibFunc::cos))
           return ConstantFoldFP(cos, V, Ty);
-        else if (Name == "cosh")
+        else if (Name == "cosh" && TLI->has(LibFunc::cosh))
           return ConstantFoldFP(cosh, V, Ty);
-        else if (Name == "cosf")
+        else if (Name == "cosf" && TLI->has(LibFunc::cosf))
           return ConstantFoldFP(cos, V, Ty);
         break;
       case 'e':
-        if (Name == "exp")
+        if (Name == "exp" && TLI->has(LibFunc::exp))
           return ConstantFoldFP(exp, V, Ty);
   
-        if (Name == "exp2") {
+        if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
           // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
           // C99 library.
           return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
         }
         break;
       case 'f':
-        if (Name == "fabs")
+        if (Name == "fabs" && TLI->has(LibFunc::fabs))
           return ConstantFoldFP(fabs, V, Ty);
-        else if (Name == "floor")
+        else if (Name == "floor" && TLI->has(LibFunc::floor))
           return ConstantFoldFP(floor, V, Ty);
         break;
       case 'l':
-        if (Name == "log" && V > 0)
+        if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
           return ConstantFoldFP(log, V, Ty);
-        else if (Name == "log10" && V > 0)
+        else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
           return ConstantFoldFP(log10, V, Ty);
         else if (F->getIntrinsicID() == Intrinsic::sqrt &&
                  (Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1248,21 +1261,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
         }
         break;
       case 's':
-        if (Name == "sin")
+        if (Name == "sin" && TLI->has(LibFunc::sin))
           return ConstantFoldFP(sin, V, Ty);
-        else if (Name == "sinh")
+        else if (Name == "sinh" && TLI->has(LibFunc::sinh))
           return ConstantFoldFP(sinh, V, Ty);
-        else if (Name == "sqrt" && V >= 0)
+        else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
           return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sqrtf" && V >= 0)
+        else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
           return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sinf")
+        else if (Name == "sinf" && TLI->has(LibFunc::sinf))
           return ConstantFoldFP(sin, V, Ty);
         break;
       case 't':
-        if (Name == "tan")
+        if (Name == "tan" && TLI->has(LibFunc::tan))
           return ConstantFoldFP(tan, V, Ty);
-        else if (Name == "tanh")
+        else if (Name == "tanh" && TLI->has(LibFunc::tanh))
           return ConstantFoldFP(tanh, V, Ty);
         break;
       default:
@@ -1277,10 +1290,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
         return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
       case Intrinsic::ctpop:
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      case Intrinsic::cttz:
-        return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
-      case Intrinsic::ctlz:
-        return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
       case Intrinsic::convert_from_fp16: {
         APFloat Val(Op->getValue());
 
@@ -1337,16 +1346,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
         if (Op2->getType() != Op1->getType())
           return 0;
-        
+
         double Op2V = Ty->isFloatTy() ? 
                       (double)Op2->getValueAPF().convertToFloat():
                       Op2->getValueAPF().convertToDouble();
 
-        if (Name == "pow")
+        if (F->getIntrinsicID() == Intrinsic::pow) {
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        if (Name == "fmod")
+        }
+        if (!TLI)
+          return 0;
+        if (Name == "pow" && TLI->has(LibFunc::pow))
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        if (Name == "fmod" && TLI->has(LibFunc::fmod))
           return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        if (Name == "atan2")
+        if (Name == "atan2" && TLI->has(LibFunc::atan2))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
         if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
@@ -1361,7 +1375,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
       return 0;
     }
     
-    
     if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
       if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
         switch (F->getIntrinsicID()) {
@@ -1401,6 +1414,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
           };
           return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
+        case Intrinsic::cttz:
+          // FIXME: This should check for Op2 == 1, and become unreachable if
+          // Op1 == 0.
+          return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+        case Intrinsic::ctlz:
+          // FIXME: This should check for Op2 == 1, and become unreachable if
+          // Op1 == 0.
+          return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
         }
       }
       
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index ed3e8f4..85dcc46 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -777,7 +777,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
     ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn,
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index eae83fd..8ffef29 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMipa
   GlobalsModRef.cpp
   IPA.cpp
   )
-
-add_llvm_library_dependencies(LLVMipa
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt
index fb16278..980e918 100644
--- a/lib/Analysis/IPA/LLVMBuild.txt
+++ b/lib/Analysis/IPA/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = IPA
 parent = Libraries
 library_name = ipa
 required_libraries = Analysis Core Support
-
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 2f41f72..f1cfd6c 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -38,22 +38,25 @@ STATISTIC(NumFactor , "Number of factorizations");
 STATISTIC(NumReassoc, "Number of reassociations");
 
 static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
+                              const TargetLibraryInfo *, const DominatorTree *,
+                              unsigned);
 static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
-                            const DominatorTree *, unsigned);
+                            const TargetLibraryInfo *, const DominatorTree *,
+                            unsigned);
 static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
+                              const TargetLibraryInfo *, const DominatorTree *,
+                              unsigned);
 static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
-                             const DominatorTree *, unsigned);
+                             const TargetLibraryInfo *, const DominatorTree *,
+                             unsigned);
 static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
+                              const TargetLibraryInfo *, const DominatorTree *,
+                              unsigned);
 
 /// getFalse - For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
 static Constant *getFalse(Type *Ty) {
-  assert((Ty->isIntegerTy(1) ||
-          (Ty->isVectorTy() &&
-           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
          "Expected i1 type or a vector of i1!");
   return Constant::getNullValue(Ty);
 }
@@ -61,9 +64,7 @@ static Constant *getFalse(Type *Ty) {
 /// getTrue - For a boolean type, or a vector of boolean type, return true, or
 /// a vector with every element true, as appropriate for the type.
 static Constant *getTrue(Type *Ty) {
-  assert((Ty->isIntegerTy(1) ||
-          (Ty->isVectorTy() &&
-           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
          "Expected i1 type or a vector of i1!");
   return Constant::getAllOnesValue(Ty);
 }
@@ -109,7 +110,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
                           unsigned OpcToExpand, const TargetData *TD,
-                          const DominatorTree *DT, unsigned MaxRecurse) {
+                          const TargetLibraryInfo *TLI, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -121,8 +123,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op C) op' (B op C)".
       Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
       // Do "A op C" and "B op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
           if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -131,7 +133,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return LHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
                                        MaxRecurse)) {
             ++NumExpand;
             return V;
@@ -145,8 +147,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op B) op' (A op C)".
       Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
       // Do "A op B" and "A op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
           if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -155,7 +157,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return RHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT,
                                        MaxRecurse)) {
             ++NumExpand;
             return V;
@@ -171,8 +173,10 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             unsigned OpcToExtract, const TargetData *TD,
-                             const DominatorTree *DT, unsigned MaxRecurse) {
+                             unsigned OpcToExtract, const TargetData *TD, 
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT,
+                             unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -196,7 +200,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *DD = A == C ? D : C;
     // Form "A op' (B op DD)" if it simplifies completely.
     // Does "B op DD" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "A op' V" if it simplifies or is already available.
       // If V equals B then "A op' V" is just the LHS.  If V equals DD then
       // "A op' V" is just the RHS.
@@ -205,7 +209,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == B ? LHS : RHS;
       }
       // Otherwise return "A op' V" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT,
+                                   MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -219,7 +224,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *CC = B == D ? C : D;
     // Form "(A op CC) op' B" if it simplifies completely..
     // Does "A op CC" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "V op' B" if it simplifies or is already available.
       // If V equals A then "V op' B" is just the LHS.  If V equals CC then
       // "V op' B" is just the RHS.
@@ -228,7 +233,8 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == A ? LHS : RHS;
       }
       // Otherwise return "V op' B" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT,
+                                   MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -242,6 +248,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// operations.  Returns the simpler value, or null if none was found.
 static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
                                        const TargetData *TD,
+                                       const TargetLibraryInfo *TLI,
                                        const DominatorTree *DT,
                                        unsigned MaxRecurse) {
   Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
@@ -261,12 +268,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "B op C" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "A op V" if it simplifies or is already available.
       // If V equals B then "A op V" is just the LHS.
       if (V == B) return LHS;
       // Otherwise return "A op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -280,12 +287,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "A op B" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "V op C" if it simplifies or is already available.
       // If V equals B then "V op C" is just the RHS.
       if (V == B) return RHS;
       // Otherwise return "V op C" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -303,12 +310,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "V op B" if it simplifies or is already available.
       // If V equals A then "V op B" is just the LHS.
       if (V == A) return LHS;
       // Otherwise return "V op B" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -322,12 +329,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) {
       // It does!  Return "B op V" if it simplifies or is already available.
       // If V equals C then "B op V" is just the RHS.
       if (V == C) return RHS;
       // Otherwise return "B op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -343,6 +350,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
 /// Returns the common value if so, otherwise returns null.
 static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
                                     const TargetData *TD,
+                                    const TargetLibraryInfo *TLI,
                                     const DominatorTree *DT,
                                     unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -361,11 +369,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
   Value *TV;
   Value *FV;
   if (SI == LHS) {
-    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse);
   } else {
-    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse);
   }
 
   // If they simplified to the same value, then return the common value.
@@ -417,6 +425,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
 /// null.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
                                   Value *RHS, const TargetData *TD,
+                                  const TargetLibraryInfo *TLI,
                                   const DominatorTree *DT,
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
@@ -436,7 +445,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 
   // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
   // Does "cmp TV, RHS" simplify?
-  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse);
+  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse);
   if (TCmp == Cond) {
     // It not only simplified, it simplified to the select condition.  Replace
     // it with 'true'.
@@ -450,7 +459,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   }
 
   // Does "cmp FV, RHS" simplify?
-  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse);
+  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse);
   if (FCmp == Cond) {
     // It not only simplified, it simplified to the select condition.  Replace
     // it with 'false'.
@@ -471,19 +480,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   // is equal to "Cond && TCmp".  This also catches the case when the false
   // value simplified to false and the true value to true, returning "Cond".
   if (match(FCmp, m_Zero()))
-    if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+    if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse))
       return V;
   // If the true value simplified to true, then the result of the compare
   // is equal to "Cond || FCmp".
   if (match(TCmp, m_One()))
-    if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+    if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse))
       return V;
   // Finally, if the false value simplified to true and the true value to
   // false, then the result of the compare is equal to "!Cond".
   if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
     if (Value *V =
         SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
-                        TD, DT, MaxRecurse))
+                        TD, TLI, DT, MaxRecurse))
       return V;
 
   return 0;
@@ -494,7 +503,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 /// it on the incoming phi values yields the same result for every value.  If so
 /// returns the common value, otherwise returns null.
 static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
-                                 const TargetData *TD, const DominatorTree *DT,
+                                 const TargetData *TD,
+                                 const TargetLibraryInfo *TLI, 
+                                 const DominatorTree *DT,
                                  unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -521,8 +532,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
     Value *V = PI == LHS ?
-      SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
-      SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+      SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -538,7 +549,9 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
 /// incoming phi values yields the same result every time.  If so returns the
 /// common result, otherwise returns null.
 static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
+                               const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
                                unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -562,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
     Value *Incoming = PI->getIncomingValue(i);
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
-    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -576,13 +589,15 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 /// SimplifyAddInst - Given operands for an Add, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -612,17 +627,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   /// i1 add -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
-                                TD, DT, MaxRecurse))
+                                TD, TLI, DT, MaxRecurse))
     return V;
 
   // Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -638,20 +653,23 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifySubInst - Given operands for a Sub, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT,
                               unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0))
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
   // X - undef -> undef
@@ -679,18 +697,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Value *Y = 0, *Z = Op1;
   if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
     // See if "V === Y - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1))
       // It does!  Now see if "X + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT,
                                    MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
       // It does!  Now see if "Y + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT,
                                    MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
@@ -703,18 +721,18 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   X = Op0;
   if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
     // See if "V === X - Y" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1))
       // It does!  Now see if "V - Z" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT,
                                    MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1))
       // It does!  Now see if "V - Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT,
                                    MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
@@ -727,9 +745,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Z = Op0;
   if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
     // See if "V === Z - X" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1))
       // It does!  Now see if "V + Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT,
                                    MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
@@ -738,12 +756,12 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   // Mul distributes over Sub.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
-                                TD, DT, MaxRecurse))
+                                TD, TLI, DT, MaxRecurse))
     return V;
 
   // i1 sub -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
       return V;
 
   // Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -759,19 +777,22 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyMulInst - Given operands for a Mul, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -802,30 +823,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
 
   // i1 mul -> and.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
-                             TD, DT, MaxRecurse))
+                             TD, TLI, DT, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT,
                                       MaxRecurse))
       return V;
 
@@ -833,19 +854,20 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const DominatorTree *DT,
-                          unsigned MaxRecurse) {
+                          const TargetData *TD, const TargetLibraryInfo *TLI,
+                          const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
     }
   }
 
@@ -898,13 +920,15 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT,
+                                         MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT,
+                                      MaxRecurse))
       return V;
 
   return 0;
@@ -913,34 +937,41 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 /// SimplifySDivInst - Given operands for an SDiv, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
                                const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT,
+                             MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyUDivInst - Given operands for a UDiv, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
                                const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT,
+                             MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+                               const TargetLibraryInfo *,
                                const DominatorTree *, unsigned) {
   // undef / X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
@@ -954,19 +985,20 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
 }
 
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyRem - Given operands for an SRem or URem, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const DominatorTree *DT,
-                          unsigned MaxRecurse) {
+                          const TargetData *TD, const TargetLibraryInfo *TLI,
+                          const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
     }
   }
 
@@ -1001,13 +1033,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
       return V;
 
   return 0;
@@ -1016,35 +1048,43 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 /// SimplifySRemInst - Given operands for an SRem, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
+                               unsigned MaxRecurse) {                               
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyURemInst - Given operands for a URem, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
-                               const DominatorTree *, unsigned) {
+                               const TargetLibraryInfo *,
+                               const DominatorTree *,
+                               unsigned) {
   // undef % X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1057,19 +1097,20 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
 }
 
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
-                            const TargetData *TD, const DominatorTree *DT,
-                            unsigned MaxRecurse) {
+                            const TargetData *TD, const TargetLibraryInfo *TLI,
+                            const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI);
     }
   }
 
@@ -1094,13 +1135,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse))
       return V;
 
   return 0;
@@ -1109,9 +1150,10 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
 /// SimplifyShlInst - Given operands for an Shl, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
-                              unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse))
     return V;
 
   // undef << X -> 0
@@ -1127,16 +1169,19 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyLShrInst - Given operands for an LShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD, const DominatorTree *DT,
+                               const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
                                unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
     return V;
 
   // undef >>l X -> 0
@@ -1153,16 +1198,20 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyAShrInst - Given operands for an AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD, const DominatorTree *DT,
+                               const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
                                unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse))
     return V;
 
   // all ones >>a X -> all ones
@@ -1183,19 +1232,23 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyAndInst - Given operands for an And, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, 
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1244,36 +1297,36 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
   }
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
-                                          MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI,
+                                          DT, MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                             TD, DT, MaxRecurse))
+                             TD, TLI, DT, MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
-                             TD, DT, MaxRecurse))
+                             TD, TLI, DT, MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                                TD, DT, MaxRecurse))
+                                TD, TLI, DT, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
-                                         MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI,
+                                         DT, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT,
                                       MaxRecurse))
       return V;
 
@@ -1281,19 +1334,21 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyOrInst - Given operands for an Or, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, 
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1343,31 +1398,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
-                                          MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI,
+                                          DT, MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
-  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
-                             TD, DT, MaxRecurse))
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD,
+                             TLI, DT, MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
-                                TD, DT, MaxRecurse))
+                                TD, TLI, DT, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT,
                                       MaxRecurse))
       return V;
 
@@ -1375,19 +1430,21 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                            const TargetLibraryInfo *TLI,
                             const DominatorTree *DT) {
-  return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyXorInst - Given operands for a Xor, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, TD, TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1412,13 +1469,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
-                                          MaxRecurse))
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI,
+                                          DT, MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
-                                TD, DT, MaxRecurse))
+                                TD, TLI, DT, MaxRecurse))
     return V;
 
   // Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1434,8 +1491,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit);
 }
 
 static Type *GetCompareTy(Value *Op) {
@@ -1465,14 +1523,16 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
+                               const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
                                unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -1489,8 +1549,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
 
   // Special case logic when the operands have i1 type.
-  if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
-       cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+  if (OpTy->getScalarType()->isIntegerTy(1)) {
     switch (Pred) {
     default: break;
     case ICmpInst::ICMP_EQ:
@@ -1671,13 +1730,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
                                         ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        TD, DT, MaxRecurse-1))
+                                        TD, TLI, DT, MaxRecurse-1))
           return V;
       } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
         if (RI->getOperand(0)->getType() == SrcTy)
           // Compare without the cast.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, DT, MaxRecurse-1))
+                                          TD, TLI, DT, MaxRecurse-1))
             return V;
       }
     }
@@ -1689,7 +1748,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that signed predicates become unsigned.
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), TD, DT,
+                                          SrcOp, RI->getOperand(0), TD, TLI, DT,
                                           MaxRecurse-1))
             return V;
       }
@@ -1705,7 +1764,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // also a case of comparing two zero-extended values.
         if (RExt == CI && MaxRecurse)
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, Trunc, TD, DT, MaxRecurse-1))
+                                        SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1))
             return V;
 
         // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
@@ -1750,7 +1809,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that the predicate does not change.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, DT, MaxRecurse-1))
+                                          TD, TLI, DT, MaxRecurse-1))
             return V;
       }
       // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1764,7 +1823,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // If the re-extended constant didn't change then this is effectively
         // also a case of comparing two sign-extended values.
         if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT,
                                           MaxRecurse-1))
             return V;
 
@@ -1800,7 +1859,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, DT, MaxRecurse-1))
+                                              TD, TLI, DT, MaxRecurse-1))
                 return V;
             break;
           case ICmpInst::ICMP_ULT:
@@ -1809,7 +1868,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, DT, MaxRecurse-1))
+                                              TD, TLI, DT, MaxRecurse-1))
                 return V;
             break;
           }
@@ -1843,14 +1902,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
                                       Constant::getNullValue(RHS->getType()),
-                                      TD, DT, MaxRecurse-1))
+                                      TD, TLI, DT, MaxRecurse-1))
         return V;
 
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred,
                                       Constant::getNullValue(LHS->getType()),
-                                      C == LHS ? D : C, TD, DT, MaxRecurse-1))
+                                      C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1))
         return V;
 
     // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1859,7 +1918,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // Determine Y and Z in the form icmp (X+Y), (X+Z).
       Value *Y = (A == C || A == D) ? B : A;
       Value *Z = (C == A || C == B) ? D : C;
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1))
         return V;
     }
   }
@@ -1942,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
         return V;
       break;
     case Instruction::Shl: {
@@ -1953,7 +2012,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1))
         return V;
       break;
     }
@@ -2007,7 +2066,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2021,7 +2080,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
           return V;
       break;
     }
@@ -2077,7 +2136,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2091,7 +2150,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1))
           return V;
       break;
     }
@@ -2151,34 +2210,38 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
       return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
+                               const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT,
                                unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -2246,21 +2309,23 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse))
       return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
@@ -2291,10 +2356,13 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
 
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
-                             const TargetData *TD, const DominatorTree *) {
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+                             const DominatorTree *) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+  PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
+  // The GEP pointer operand is not a pointer, it's a vector of pointers.
+  if (!PtrTy)
+    return 0;
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
@@ -2392,62 +2460,76 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
   return CommonValue;
 }
 
-
 //=== Helper functions for higher up the class hierarchy.
 
 /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                            const TargetData *TD, const DominatorTree *DT,
+                            const TargetData *TD, 
+                            const TargetLibraryInfo *TLI,
+                            const DominatorTree *DT,
                             unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::Add:
     return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
+                           TD, TLI, DT, MaxRecurse);
   case Instruction::Sub:
     return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
-  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+                           TD, TLI, DT, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT,
+                                                  MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
+                           TD, TLI, DT, MaxRecurse);
   case Instruction::LShr:
-    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
+                            MaxRecurse);
   case Instruction::AShr:
-    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
-  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT,
+                            MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT,
+                                                MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, TLI, DT,
+                                                MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT,
+                                                MaxRecurse);
   default:
     if (Constant *CLHS = dyn_cast<Constant>(LHS))
       if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
         Constant *COps[] = {CLHS, CRHS};
-        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD);
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI);
       }
 
     // If the operation is associative, try some generic simplifications.
     if (Instruction::isAssociative(Opcode))
-      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT,
                                               MaxRecurse))
         return V;
 
     // If the operation is with the result of a select instruction, check whether
     // operating on either branch of the select always yields the same value.
     if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT,
                                            MaxRecurse))
         return V;
 
     // If the operation is with the result of a phi instruction, check whether
     // operating on all incoming values of the phi always yields the same value.
     if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT,
+                                        MaxRecurse))
         return V;
 
     return 0;
@@ -2455,23 +2537,27 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+                           const TargetData *TD, const TargetLibraryInfo *TLI,
+                           const DominatorTree *DT) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit);
 }
 
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
 /// fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT,
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT,
                               unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
-    return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+    return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse);
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit);
 }
 
 static Value *SimplifyCallInst(CallInst *CI) {
@@ -2485,78 +2571,79 @@ static Value *SimplifyCallInst(CallInst *CI) {
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+                                 const TargetLibraryInfo *TLI,
                                  const DominatorTree *DT) {
   Value *Result;
 
   switch (I->getOpcode()) {
   default:
-    Result = ConstantFoldInstruction(I, TD);
+    Result = ConstantFoldInstruction(I, TD, TLI);
     break;
   case Instruction::Add:
     Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::Sub:
     Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::Mul:
-    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::SDiv:
-    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::UDiv:
-    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FDiv:
-    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::SRem:
-    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::URem:
-    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FRem:
-    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::LShr:
     Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, DT);
+                              TD, TLI, DT);
     break;
   case Instruction::AShr:
     Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, DT);
+                              TD, TLI, DT);
     break;
   case Instruction::And:
-    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Or:
-    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Xor:
-    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::ICmp:
     Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, DT);
+                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FCmp:
     Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, DT);
+                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
@@ -2596,6 +2683,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
 ///
 void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
                                      const TargetData *TD,
+                                     const TargetLibraryInfo *TLI,
                                      const DominatorTree *DT) {
   assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
 
@@ -2620,12 +2708,12 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
       // SimplifyInstruction.
       AssertingVH<> UserHandle(User);
 
-      SimplifiedVal = SimplifyInstruction(User, TD, DT);
+      SimplifiedVal = SimplifyInstruction(User, TD, TLI, DT);
       if (SimplifiedVal == 0) continue;
     }
 
     // Recursively simplify this user to the new value.
-    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, TLI, DT);
     From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
     To = ToHandle;
 
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 92f199b..a8a8079 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -15,9 +15,11 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = IPA
+
 [component_0]
 type = Library
 name = Analysis
 parent = Libraries
 required_libraries = Core Support Target
-
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index f80595c..d27d911 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
@@ -33,7 +34,10 @@
 using namespace llvm;
 
 char LazyValueInfo::ID = 0;
-INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
 
 namespace llvm {
@@ -61,10 +65,10 @@ class LVILatticeVal {
     constant,
     /// notconstant - This Value is known to not have the specified value.
     notconstant,
-    
+
     /// constantrange - The Value falls within this range.
     constantrange,
-    
+
     /// overdefined - This value is not known to be constant, and we know that
     /// it has a value.
     overdefined
@@ -207,7 +211,7 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use TargetData for smarter constant folding.
+        // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
         if (ConstantInt *Res = dyn_cast<ConstantInt>(
                 ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
                                                 getConstant(),
@@ -233,7 +237,7 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use TargetData for smarter constant folding.
+        // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
         if (ConstantInt *Res = dyn_cast<ConstantInt>(
                 ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
                                                 getNotConstant(),
@@ -367,7 +371,11 @@ namespace {
     /// for cache updating.
     typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
     DenseSet<OverDefinedPairTy> OverDefinedCache;
-    
+
+    /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+    /// don't spend time removing unused blocks from our caches.
+    DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
     /// BlockValueStack - This stack holds the state of the value solver
     /// during a query.  It basically emulates the callstack of the naive
     /// recursive value lookup process.
@@ -438,6 +446,7 @@ namespace {
     
     /// clear - Empty the cache.
     void clear() {
+      SeenBlocks.clear();
       ValueCache.clear();
       OverDefinedCache.clear();
     }
@@ -466,6 +475,12 @@ void LVIValueHandle::deleted() {
 }
 
 void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  // Shortcut if we have never seen this block.
+  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  if (I == SeenBlocks.end())
+    return;
+  SeenBlocks.erase(I);
+
   SmallVector<OverDefinedPairTy, 4> ToErase;
   for (DenseSet<OverDefinedPairTy>::iterator  I = OverDefinedCache.begin(),
        E = OverDefinedCache.end(); I != E; ++I) {
@@ -505,6 +520,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
   if (Constant *VC = dyn_cast<Constant>(Val))
     return LVILatticeVal::get(VC);
 
+  SeenBlocks.insert(BB);
   return lookup(Val)[BB];
 }
 
@@ -513,6 +529,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
     return true;
 
   ValueCacheEntryTy &Cache = lookup(Val);
+  SeenBlocks.insert(BB);
   LVILatticeVal &BBLV = Cache[BB];
   
   // OverDefinedCacheUpdater is a helper object that will update
@@ -1007,12 +1024,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
 bool LazyValueInfo::runOnFunction(Function &F) {
   if (PImpl)
     getCache(PImpl).clear();
-  
+
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   // Fully lazy.
   return false;
 }
 
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfo>();
+}
+
 void LazyValueInfo::releaseMemory() {
   // If the cache was allocated, free it.
   if (PImpl) {
@@ -1061,7 +1085,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = 0;
   if (Result.isConstant()) {
-    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+                                          TLI);
     if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? False : True;
     return Unknown;
@@ -1102,13 +1127,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
     if (Pred == ICmpInst::ICMP_EQ) {
       // !C1 == C -> false iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD);
+                                            Result.getNotConstant(), C, TD,
+                                            TLI);
       if (Res->isNullValue())
         return False;
     } else if (Pred == ICmpInst::ICMP_NE) {
       // !C1 != C -> true iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD);
+                                            Result.getNotConstant(), C, TD,
+                                            TLI);
       if (Res->isNullValue())
         return True;
     }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 38d677d..971065f 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/IntrinsicInst.h"
@@ -103,6 +104,7 @@ namespace {
     AliasAnalysis *AA;
     DominatorTree *DT;
     TargetData *TD;
+    TargetLibraryInfo *TLI;
 
     std::string Messages;
     raw_string_ostream MessagesStr;
@@ -117,6 +119,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       AU.addRequired<DominatorTree>();
     }
     virtual void print(raw_ostream &O, const Module *M) const {}
@@ -149,6 +152,7 @@ namespace {
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
                       false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) {
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   visit(F);
   dbgs() << MessagesStr.str();
   Messages.clear();
@@ -614,10 +619,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, TD, DT))
+    if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, TD))
+    if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   }
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index c7833bf..858cc64 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
@@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   // Test if the value is already loop-invariant.
   if (isLoopInvariant(I))
     return true;
-  if (!I->isSafeToSpeculativelyExecute())
+  if (!isSafeToSpeculativelyExecute(I))
     return false;
   if (I->mayReadFromMemory())
     return false;
@@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return 0;
 }
 
-/// getTripCount - Return a loop-invariant LLVM value indicating the number of
-/// times the loop will be executed.  Note that this means that the backedge
-/// of the loop executes N-1 times.  If the trip-count cannot be determined,
-/// this returns null.
-///
-/// The IndVarSimplify pass transforms loops to have a form that this
-/// function easily understands.
-///
-Value *Loop::getTripCount() const {
-  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
-  // canonical induction variable and V is the trip count of the loop.
-  PHINode *IV = getCanonicalInductionVariable();
-  if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
-
-  bool P0InLoop = contains(IV->getIncomingBlock(0));
-  Value *Inc = IV->getIncomingValue(!P0InLoop);
-  BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
-
-  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
-    if (BI->isConditional()) {
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
-        if (ICI->getOperand(0) == Inc) {
-          if (BI->getSuccessor(0) == getHeader()) {
-            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
-              return ICI->getOperand(1);
-          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
-            return ICI->getOperand(1);
-          }
-        }
-      }
-    }
-
-  return 0;
-}
-
-/// getSmallConstantTripCount - Returns the trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the trip count is very large
-/// (>= 2^32)
-unsigned Loop::getSmallConstantTripCount() const {
-  Value* TripCount = this->getTripCount();
-  if (TripCount) {
-    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
-      // Guard against huge trip counts.
-      if (TripCountC->getValue().getActiveBits() <= 32) {
-        return (unsigned)TripCountC->getZExtValue();
-      }
-    }
-  }
-  return 0;
-}
-
-/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-/// trip count of this loop as a normal unsigned value, if possible. This
-/// means that the actual trip count is always a multiple of the returned
-/// value (don't forget the trip count could very well be zero as well!).
-///
-/// Returns 1 if the trip count is unknown or not guaranteed to be the
-/// multiple of a constant (which is also the case if the trip count is simply
-/// constant, use getSmallConstantTripCount for that case), Will also return 1
-/// if the trip count is very large (>= 2^32).
-unsigned Loop::getSmallConstantTripMultiple() const {
-  Value* TripCount = this->getTripCount();
-  // This will hold the ConstantInt result, if any
-  ConstantInt *Result = NULL;
-  if (TripCount) {
-    // See if the trip count is constant itself
-    Result = dyn_cast<ConstantInt>(TripCount);
-    // if not, see if it is a multiplication
-    if (!Result)
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
-        switch (BO->getOpcode()) {
-        case BinaryOperator::Mul:
-          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
-          break;
-        case BinaryOperator::Shl:
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
-            if (CI->getValue().getActiveBits() <= 5)
-              return 1u << CI->getZExtValue();
-          break;
-        default:
-          break;
-        }
-      }
-  }
-  // Guard against huge trip counts.
-  if (Result && Result->getValue().getActiveBits() <= 32) {
-    return (unsigned)Result->getZExtValue();
-  } else {
-    return 1;
-  }
-}
-
 /// isLCSSAForm - Return true if the Loop is in LCSSA form
 bool Loop::isLCSSAForm(DominatorTree &DT) const {
   // Sort the blocks vector so that we can use binary search to do quick
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index fde07ea..22414b3 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
         AliasAnalysis::Location Loc = AA.getLocation(LI);
         MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        if (!LI->isUnordered()) {
+        if (!SI->isUnordered()) {
           // FIXME: Handle atomic/volatile stores.
           Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
                                            static_cast<BasicBlock *>(0)));
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 7e22ddc..80ea219 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
@@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) {
     return true;
 
   if (isa<CastInst>(Inst) &&
-      Inst->isSafeToSpeculativelyExecute())
+      isSafeToSpeculativelyExecute(Inst))
     return true;
 
   if (Inst->getOpcode() == Instruction::Add &&
@@ -186,7 +187,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   // operands need to be phi translated, and if so, reconstruct it.
 
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
     Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
     if (PHIIn == 0) return 0;
     if (PHIIn == Cast->getOperand(0))
@@ -284,7 +285,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
@@ -381,7 +382,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
 
   // Handle cast of PHI translatable value.
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
     Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
     if (OpVal == 0) return 0;
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 379d79c..0cb1588 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -30,7 +30,7 @@ static cl::opt<bool,false>
 ProfileVerifierDisableAssertions("profile-verifier-noassert",
      cl::desc("Disable assertions"));
 
-namespace llvm {
+namespace {
   template<class FType, class BType>
   class ProfileVerifierPassT : public FunctionPass {
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 622b214..daf7742 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,6 +74,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
@@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 char ScalarEvolution::ID = 0;
@@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const {
         OS << OpStr;
     }
     OS << ")";
+    switch (NAry->getSCEVType()) {
+    case scAddExpr:
+    case scMulExpr:
+      if (NAry->getNoWrapFlags(FlagNUW))
+        OS << "<nuw>";
+      if (NAry->getNoWrapFlags(FlagNSW))
+        OS << "<nsw>";
+    }
     return;
   }
   case scUDivExpr: {
@@ -2581,7 +2591,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
 
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2590,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
 const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
   Constant *C = ConstantExpr::getAlignOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2607,7 +2617,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
 
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2617,7 +2627,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
                                              Constant *FieldNo) {
   Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -3108,7 +3118,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, DT))
+  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
     if (LI->replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -3584,6 +3594,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // because it leads to N-1 getAddExpr calls for N ultimate operands.
     // Instead, gather up all the operands and make a single getAddExpr call.
     // LLVM IR canonical form means we need only traverse the left operands.
+    //
+    // Don't apply this instruction's NSW or NUW flags to the new
+    // expression. The instruction may be guarded by control flow that the
+    // no-wrap behavior depends on. Non-control-equivalent instructions can be
+    // mapped to the same SCEV expression, and it would be incorrect to transfer
+    // NSW/NUW semantics to those operations.
     SmallVector<const SCEV *, 4> AddOps;
     AddOps.push_back(getSCEV(U->getOperand(1)));
     for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
@@ -3598,16 +3614,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         AddOps.push_back(Op1);
     }
     AddOps.push_back(getSCEV(U->getOperand(0)));
-    SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-    OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
-    if (OBO->hasNoSignedWrap())
-      Flags = setFlags(Flags, SCEV::FlagNSW);
-    if (OBO->hasNoUnsignedWrap())
-      Flags = setFlags(Flags, SCEV::FlagNUW);
-    return getAddExpr(AddOps, Flags);
+    return getAddExpr(AddOps);
   }
   case Instruction::Mul: {
-    // See the Add code above.
+    // Don't transfer NSW/NUW for the same reason as AddExpr.
     SmallVector<const SCEV *, 4> MulOps;
     MulOps.push_back(getSCEV(U->getOperand(1)));
     for (Value *Op = U->getOperand(0);
@@ -4762,7 +4772,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 /// reason, return null.
 static Constant *EvaluateExpression(Value *V, const Loop *L,
                                     DenseMap<Instruction *, Constant *> &Vals,
-                                    const TargetData *TD) {
+                                    const TargetData *TD,
+                                    const TargetLibraryInfo *TLI) {
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
   Instruction *I = dyn_cast<Instruction>(V);
@@ -4788,7 +4799,7 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
       if (!Operands[i]) return 0;
       continue;
     }
-    Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
     Vals[Operand] = C;
     if (!C) return 0;
     Operands[i] = C;
@@ -4796,12 +4807,13 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
 
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD);
+                                           Operands[1], TD, TLI);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (!LI->isVolatile())
       return ConstantFoldLoadFromConstPtr(Operands[0], TD);
   }
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+                                  TLI);
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4856,7 +4868,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     // Compute the value of the PHIs for the next iteration.
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
     DenseMap<Instruction *, Constant *> NextIterVals;
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+                                           TLI);
     if (NextPHI == 0)
       return 0;        // Couldn't evaluate!
     NextIterVals[PN] = NextPHI;
@@ -4881,7 +4894,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
       Constant *&NextPHI = NextIterVals[PHI];
       if (!NextPHI) {   // Not already computed.
         Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
       }
       if (NextPHI != I->second)
         StoppedEvolving = false;
@@ -4936,8 +4949,8 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
     ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L,
-                                                       CurrentIterVals, TD));
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+                                                       TD, TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -4967,7 +4980,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
       if (NextPHI) continue;    // Already computed!
 
       Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
     }
     CurrentIterVals.swap(NextIterVals);
   }
@@ -5159,13 +5172,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           Constant *C = 0;
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD);
+                                                Operands[0], Operands[1], TD,
+                                                TLI);
           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
             if (!LI->isVolatile())
               C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
           } else
             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, TD);
+                                         Operands, TD, TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -6552,6 +6566,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
   this->F = &F;
   LI = &getAnalysis<LoopInfo>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   DT = &getAnalysis<DominatorTree>();
   return false;
 }
@@ -6588,6 +6603,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<LoopInfo>();
   AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequired<TargetLibraryInfo>();
 }
 
 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 47f0f32..f3cf549 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -73,9 +73,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
          "InsertNoopCastOfTo cannot change sizes!");
 
   // Short-circuit unnecessary bitcasts.
-  if (Op == Instruction::BitCast && V->getType() == Ty)
-    return V;
-
+  if (Op == Instruction::BitCast) {
+    if (V->getType() == Ty)
+      return V;
+    if (CastInst *CI = dyn_cast<CastInst>(V)) {
+      if (CI->getOperand(0)->getType() == Ty)
+        return CI->getOperand(0);
+    }
+  }
   // Short-circuit unnecessary inttoptr<->ptrtoint casts.
   if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
       SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
@@ -929,6 +934,36 @@ bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
   }
 }
 
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+                                 Type *ExpandTy, Type *IntTy,
+                                 bool useSubtract) {
+  Value *IncV;
+  // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+  if (ExpandTy->isPointerTy()) {
+    PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+    // If the step isn't constant, don't use an implicitly scaled GEP, because
+    // that would require a multiply inside the loop.
+    if (!isa<ConstantInt>(StepV))
+      GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                  GEPPtrTy->getAddressSpace());
+    const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+    IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+    if (IncV->getType() != PN->getType()) {
+      IncV = Builder.CreateBitCast(IncV, PN->getType());
+      rememberInstruction(IncV);
+    }
+  } else {
+    IncV = useSubtract ?
+      Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+      Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+    rememberInstruction(IncV);
+  }
+  return IncV;
+}
+
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
 /// the base addrec, which is the addrec without any non-loop-dominating
 /// values, and return the PHI.
@@ -993,16 +1028,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
          SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
                                   L->getHeader()));
 
-  // Expand code for the step value. Insert instructions right before the
-  // terminator corresponding to the back-edge. Do this before creating the PHI
-  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
-  // negative, insert a sub instead of an add for the increment (unless it's a
-  // constant, because subtracts of constants are canonicalized to adds).
+  // Expand code for the step value. Do this before creating the PHI so that PHI
+  // reuse code doesn't see an incomplete PHI.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
-  bool isPointer = ExpandTy->isPointerTy();
-  bool isNegative = !isPointer && isNonConstantNegative(Step);
-  if (isNegative)
+  // If the stride is negative, insert a sub instead of an add for the increment
+  // (unless it's a constant, because subtracts of constants are canonicalized
+  // to adds).
+  bool useSubtract = !ExpandTy->isPointerTy() && isNonConstantNegative(Step);
+  if (useSubtract)
     Step = SE.getNegativeSCEV(Step);
+  // Expand the step somewhere that dominates the loop header.
   Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
 
   // Create the PHI.
@@ -1023,33 +1058,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       continue;
     }
 
-    // Create a step value and add it to the PHI. If IVIncInsertLoop is
-    // non-null and equal to the addrec's loop, insert the instructions
-    // at IVIncInsertPos.
+    // Create a step value and add it to the PHI.
+    // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+    // instructions at IVIncInsertPos.
     Instruction *InsertPos = L == IVIncInsertLoop ?
       IVIncInsertPos : Pred->getTerminator();
     Builder.SetInsertPoint(InsertPos);
-    Value *IncV;
-    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
-    if (isPointer) {
-      PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
-      // If the step isn't constant, don't use an implicitly scaled GEP, because
-      // that would require a multiply inside the loop.
-      if (!isa<ConstantInt>(StepV))
-        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
-                                    GEPPtrTy->getAddressSpace());
-      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
-      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
-      if (IncV->getType() != PN->getType()) {
-        IncV = Builder.CreateBitCast(IncV, PN->getType());
-        rememberInstruction(IncV);
-      }
-    } else {
-      IncV = isNegative ?
-        Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
-        Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
-      rememberInstruction(IncV);
-    }
+    Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
     PN->addIncoming(IncV, Pred);
   }
 
@@ -1124,10 +1140,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     // For an expansion to use the postinc form, the client must call
     // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
     // or dominated by IVIncInsertPos.
-    assert((!isa<Instruction>(Result) ||
-            SE.DT->dominates(cast<Instruction>(Result),
-                             Builder.GetInsertPoint())) &&
-           "postinc expansion does not dominate use");
+    if (isa<Instruction>(Result)
+        && !SE.DT->dominates(cast<Instruction>(Result),
+                             Builder.GetInsertPoint())) {
+      // The induction variable's postinc expansion does not dominate this use.
+      // IVUsers tries to prevent this case, so it is rare. However, it can
+      // happen when an IVUser outside the loop is not dominated by the latch
+      // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+      // all cases. Consider a phi outide whose operand is replaced during
+      // expansion with the value of the postinc user. Without fundamentally
+      // changing the way postinc users are tracked, the only remedy is
+      // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+      // but hopefully expandCodeFor handles that.
+      bool useSubtract =
+        !ExpandTy->isPointerTy() && isNonConstantNegative(Step);
+      if (useSubtract)
+        Step = SE.getNegativeSCEV(Step);
+      // Expand the step somewhere that dominates the loop header.
+      BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+      BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+      Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+      // Restore the insertion point to the place where the caller has
+      // determined dominates all uses.
+      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+      Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+    }
   }
 
   // Re-apply any non-loop-dominating scale.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 22f1c14..ef19e06 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -63,13 +63,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
-         && "Not integer or pointer type!");
+  assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarType()->isPointerTy()) &&
+         "Not integer or pointer type!");
   assert((!TD ||
           TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
          (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth && 
+         KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
          "V, Mask, KnownOne and KnownZero should have same BitWidth");
 
@@ -103,14 +104,16 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
     if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
-      Type *ObjectType = GV->getType()->getElementType();
-      // If the object is defined in the current Module, we'll be giving
-      // it the preferred alignment. Otherwise, we have to assume that it
-      // may only have the minimum ABI alignment.
-      if (!GV->isDeclaration() && !GV->mayBeOverridden())
-        Align = TD->getPrefTypeAlignment(ObjectType);
-      else
-        Align = TD->getABITypeAlignment(ObjectType);
+      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        // If the object is defined in the current Module, we'll be giving
+        // it the preferred alignment. Otherwise, we have to assume that it
+        // may only have the minimum ABI alignment.
+        if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+          Align = TD->getPreferredAlignment(GVar);
+        else
+          Align = TD->getABITypeAlignment(ObjectType);
+      }
     }
     if (Align > 0)
       KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
@@ -1367,6 +1370,8 @@ Value *llvm::isBytewiseValue(Value *V) {
     
     return Val;
   }
+
+  // FIXME: Vector types (e.g., <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>).
   
   // Conceptually, we could handle things like:
   //   %a = zext i8 %X to i16
@@ -1555,7 +1560,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
                                               const TargetData &TD) {
   Operator *PtrOp = dyn_cast<Operator>(Ptr);
-  if (PtrOp == 0) return Ptr;
+  if (PtrOp == 0 || Ptr->getType()->isVectorTy())
+    return Ptr;
   
   // Just look through bitcasts.
   if (PtrOp->getOpcode() == Instruction::BitCast)
@@ -1869,3 +1875,64 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
   }
   return true;
 }
+
+bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst,
+                                        const TargetData *TD) {
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (Inst->getOpcode()) {
+  default:
+    return true;
+  case Instruction::UDiv:
+  case Instruction::URem:
+    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+    return isKnownNonZero(Inst->getOperand(1), TD);
+  case Instruction::SDiv:
+  case Instruction::SRem: {
+    Value *Op = Inst->getOperand(1);
+    // x / y is undefined if y == 0
+    if (!isKnownNonZero(Op, TD))
+      return false;
+    // x / y might be undefined if y == -1
+    unsigned BitWidth = getBitWidth(Op->getType(), TD);
+    if (BitWidth == 0)
+      return false;
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(Op, APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne, TD);
+    return !!KnownZero;
+  }
+  case Instruction::Load: {
+    const LoadInst *LI = cast<LoadInst>(Inst);
+    if (!LI->isUnordered())
+      return false;
+    return LI->getPointerOperand()->isDereferenceablePointer();
+  }
+  case Instruction::Call:
+    return false; // The called function could have undefined behavior or
+                  // side-effects.
+                  // FIXME: We should special-case some intrinsics (bswap,
+                  // overflow-checking arithmetic, etc.)
+  case Instruction::VAArg:
+  case Instruction::Alloca:
+  case Instruction::Invoke:
+  case Instruction::PHI:
+  case Instruction::Store:
+  case Instruction::Ret:
+  case Instruction::Br:
+  case Instruction::IndirectBr:
+  case Instruction::Switch:
+  case Instruction::Unwind:
+  case Instruction::Unreachable:
+  case Instruction::Fence:
+  case Instruction::LandingPad:
+  case Instruction::AtomicRMW:
+  case Instruction::AtomicCmpXchg:
+  case Instruction::Resume:
+    return false; // Misc instructions which have effects
+  }
+}
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 8fcc7aa..9ef2943 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
   if (hasSlash || filePath.str().length() > 15)
     flags |= ArchiveMember::HasLongFilenameFlag;
 
-  sys::LLVMFileType type;
+  sys::fs::file_magic type;
   if (sys::fs::identify_magic(mbr->path.str(), type))
-    type = sys::Unknown_FileType;
+    type = sys::fs::file_magic::unknown;
   switch (type) {
-    case sys::Bitcode_FileType:
+    case sys::fs::file_magic::bitcode:
       flags |= ArchiveMember::BitcodeFlag;
       break;
     default:
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index b52974e..7ff478a 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -3,9 +3,3 @@ add_llvm_library(LLVMArchive
   ArchiveReader.cpp
   ArchiveWriter.cpp
   )
-
-add_llvm_library_dependencies(LLVMArchive
-  LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt
index 26b7c8e..d68550b 100644
--- a/lib/Archive/LLVMBuild.txt
+++ b/lib/Archive/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Archive
 parent = Libraries
 required_libraries = BitReader Core Support
-
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
index 7496015..985ebe2 100644
--- a/lib/AsmParser/CMakeLists.txt
+++ b/lib/AsmParser/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMAsmParser
   LLParser.cpp
   Parser.cpp
   )
-
-add_llvm_library_dependencies(LLVMAsmParser
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cafaab0..4678269 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() {
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
     UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
 
-  // Upgrade to new EH scheme. N.B. This will go away in 3.1.
-  UpgradeExceptionHandling(M);
-
-  // Check debug info intrinsics.
-  CheckDebugInfoIntrinsics(M);
   return false;
 }
 
@@ -1069,7 +1064,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
       return TokError("expected metadata after comma");
 
     std::string Name = Lex.getStrVal();
-    unsigned MDK = M->getMDKindID(Name.c_str());
+    unsigned MDK = M->getMDKindID(Name);
     Lex.Lex();
 
     MDNode *Node;
@@ -1612,7 +1607,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
     if ((unsigned)Size != Size)
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
-      return Error(TypeLoc, "vector element type must be fp or integer");
+      return Error(TypeLoc,
+       "vector element type must be fp, integer or a pointer to these types");
     Result = VectorType::get(EltTy, unsigned(Size));
   } else {
     if (!ArrayType::isValidElementType(EltTy))
@@ -1971,9 +1967,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return Error(ID.Loc, "constant vector must not be empty");
 
     if (!Elts[0]->getType()->isIntegerTy() &&
-        !Elts[0]->getType()->isFloatingPointTy())
+        !Elts[0]->getType()->isFloatingPointTy() &&
+        !Elts[0]->getType()->isPointerTy())
       return Error(FirstEltLoc,
-                   "vector elements must have integer or floating point type");
+            "vector elements must have integer, pointer or floating point type");
 
     // Verify that all the vector elements have the same type.
     for (unsigned i = 1, e = Elts.size(); i != e; ++i)
@@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     } else {
       assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
       if (!Val0->getType()->isIntOrIntVectorTy() &&
-          !Val0->getType()->isPointerTy())
+          !Val0->getType()->getScalarType()->isPointerTy())
         return Error(ID.Loc, "icmp requires pointer or integer operands");
       ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
     }
@@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (Opc == Instruction::GetElementPtr) {
-      if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+      if (Elts.size() == 0 ||
+          !Elts[0]->getType()->getScalarType()->isPointerTy())
         return Error(ID.Loc, "getelementptr requires pointer operand");
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -2953,19 +2951,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
-  case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
-  case lltok::kw_store:          return ParseStore(Inst, PFS, false);
+  case lltok::kw_load:           return ParseLoad(Inst, PFS);
+  case lltok::kw_store:          return ParseStore(Inst, PFS);
   case lltok::kw_cmpxchg:        return ParseCmpXchg(Inst, PFS);
   case lltok::kw_atomicrmw:      return ParseAtomicRMW(Inst, PFS);
   case lltok::kw_fence:          return ParseFence(Inst, PFS);
-  case lltok::kw_volatile:
-    // For compatibility; canonical location is after load
-    if (EatIfPresent(lltok::kw_load))
-      return ParseLoad(Inst, PFS, true);
-    else if (EatIfPresent(lltok::kw_store))
-      return ParseStore(Inst, PFS, true);
-    else
-      return TokError("expected 'load' or 'store'");
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
   case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
   case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
@@ -3342,7 +3332,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
   } else {
     assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
     if (!LHS->getType()->isIntOrIntVectorTy() &&
-        !LHS->getType()->isPointerTy())
+        !LHS->getType()->getScalarType()->isPointerTy())
       return Error(Loc, "icmp requires integer operands");
     Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   }
@@ -3689,10 +3679,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
 ///   ::= 'load' 'atomic' 'volatile'? TypeAndValue 
 ///       'singlethread'? AtomicOrdering (',' 'align' i32)?
-///   Compatibility:
-///   ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
-                        bool isVolatile) {
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val; LocTy Loc;
   unsigned Alignment = 0;
   bool AteExtraComma = false;
@@ -3701,15 +3688,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
   SynchronizationScope Scope = CrossThread;
 
   if (Lex.getKind() == lltok::kw_atomic) {
-    if (isVolatile)
-      return TokError("mixing atomic with old volatile placement");
     isAtomic = true;
     Lex.Lex();
   }
 
+  bool isVolatile = false;
   if (Lex.getKind() == lltok::kw_volatile) {
-    if (isVolatile)
-      return TokError("duplicate volatile before and after store");
     isVolatile = true;
     Lex.Lex();
   }
@@ -3736,10 +3720,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
 ///   ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
 ///   ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
 ///       'singlethread'? AtomicOrdering (',' 'align' i32)?
-///   Compatibility:
-///   ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
-                         bool isVolatile) {
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val, *Ptr; LocTy Loc, PtrLoc;
   unsigned Alignment = 0;
   bool AteExtraComma = false;
@@ -3748,15 +3729,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   SynchronizationScope Scope = CrossThread;
 
   if (Lex.getKind() == lltok::kw_atomic) {
-    if (isVolatile)
-      return TokError("mixing atomic with old volatile placement");
     isAtomic = true;
     Lex.Lex();
   }
 
+  bool isVolatile = false;
   if (Lex.getKind() == lltok::kw_volatile) {
-    if (isVolatile)
-      return TokError("duplicate volatile before and after store");
     isVolatile = true;
     Lex.Lex();
   }
@@ -3902,13 +3880,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParseGetElementPtr
 ///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
-  Value *Ptr, *Val; LocTy Loc, EltLoc;
+  Value *Ptr = 0;
+  Value *Val = 0;
+  LocTy Loc, EltLoc;
 
   bool InBounds = EatIfPresent(lltok::kw_inbounds);
 
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
 
-  if (!Ptr->getType()->isPointerTy())
+  if (!Ptr->getType()->getScalarType()->isPointerTy())
     return Error(Loc, "base of getelementptr must be a pointer");
 
   SmallVector<Value*, 16> Indices;
@@ -3919,11 +3899,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       break;
     }
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
-    if (!Val->getType()->isIntegerTy())
+    if (!Val->getType()->getScalarType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
+    if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
+      return Error(EltLoc, "getelementptr index type missmatch");
+    if (Val->getType()->isVectorTy()) {
+      unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
+      unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
+      if (ValNumEl != PtrNumEl)
+        return Error(EltLoc,
+          "getelementptr vector index has a wrong number of elements");
+    }
     Indices.push_back(Val);
   }
 
+  if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
+    return Error(EltLoc, "vector getelementptrs must have a single index");
+
   if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index cbc3c23..c2537d7 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -363,8 +363,8 @@ namespace llvm {
     bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
     bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
     int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
-    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
-    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
+    int ParseStore(Instruction *&I, PerFunctionState &PFS);
     int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
     int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
     int ParseFence(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt
index ad56d4c..3bc31ed 100644
--- a/lib/AsmParser/LLVMBuild.txt
+++ b/lib/AsmParser/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = AsmParser
 parent = Libraries
 required_libraries = Core Support
-
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
index 696440d..af9936b 100644
--- a/lib/Bitcode/LLVMBuild.txt
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -15,8 +15,10 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = Reader Writer
+
 [component_0]
 type = Group
 name = Bitcode
 parent = Libraries
-
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 6ecdbae..d584015 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -403,14 +403,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
   return TypeList[ID] = StructType::create(Context);
 }
 
-/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
-Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
-  if (ID >= TypeList.size())
-    TypeList.resize(ID+1);
-  
-  return TypeList[ID];
-}
-
 
 //===----------------------------------------------------------------------===//
 //  Functions for parsing blocks from the bitcode file
@@ -747,264 +739,6 @@ bool BitcodeReader::ParseTypeTableBody() {
   }
 }
 
-// FIXME: Remove in LLVM 3.1
-bool BitcodeReader::ParseOldTypeTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
-    return Error("Malformed block record");
-
-  if (!TypeList.empty())
-    return Error("Multiple TYPE_BLOCKs found!");
-  
-  
-  // While horrible, we have no good ordering of types in the bc file.  Just
-  // iteratively parse types out of the bc file in multiple passes until we get
-  // them all.  Do this by saving a cursor for the start of the type block.
-  BitstreamCursor StartOfTypeBlockCursor(Stream);
-  
-  unsigned NumTypesRead = 0;
-  
-  SmallVector<uint64_t, 64> Record;
-RestartScan:
-  unsigned NextTypeID = 0;
-  bool ReadAnyTypes = false;
-  
-  // Read all the records for this type table.
-  while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (NextTypeID != TypeList.size())
-        return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
-      
-      // If we haven't read all of the types yet, iterate again.
-      if (NumTypesRead != TypeList.size()) {
-        // If we didn't successfully read any types in this pass, then we must
-        // have an unhandled forward reference.
-        if (!ReadAnyTypes)
-          return Error("Obsolete bitcode contains unhandled recursive type");
-        
-        Stream = StartOfTypeBlockCursor;
-        goto RestartScan;
-      }
-      
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of type table block");
-      return false;
-    }
-    
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-    
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
-    
-    // Read a record.
-    Record.clear();
-    Type *ResultTy = 0;
-    switch (Stream.ReadRecord(Code, Record)) {
-    default: return Error("unknown type in type table");
-    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
-      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
-      // type list.  This allows us to reserve space.
-      if (Record.size() < 1)
-        return Error("Invalid TYPE_CODE_NUMENTRY record");
-      TypeList.resize(Record[0]);
-      continue;
-    case bitc::TYPE_CODE_VOID:      // VOID
-      ResultTy = Type::getVoidTy(Context);
-      break;
-    case bitc::TYPE_CODE_FLOAT:     // FLOAT
-      ResultTy = Type::getFloatTy(Context);
-      break;
-    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
-      ResultTy = Type::getDoubleTy(Context);
-      break;
-    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
-      ResultTy = Type::getX86_FP80Ty(Context);
-      break;
-    case bitc::TYPE_CODE_FP128:     // FP128
-      ResultTy = Type::getFP128Ty(Context);
-      break;
-    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
-      ResultTy = Type::getPPC_FP128Ty(Context);
-      break;
-    case bitc::TYPE_CODE_LABEL:     // LABEL
-      ResultTy = Type::getLabelTy(Context);
-      break;
-    case bitc::TYPE_CODE_METADATA:  // METADATA
-      ResultTy = Type::getMetadataTy(Context);
-      break;
-    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
-      ResultTy = Type::getX86_MMXTy(Context);
-      break;
-    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
-      if (Record.size() < 1)
-        return Error("Invalid Integer type record");
-      ResultTy = IntegerType::get(Context, Record[0]);
-      break;
-    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
-      if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
-        ResultTy = StructType::create(Context);
-      break;
-    case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
-      if (NextTypeID >= TypeList.size()) break;
-      // If we already read it, don't reprocess.
-      if (TypeList[NextTypeID] &&
-          !cast<StructType>(TypeList[NextTypeID])->isOpaque())
-        break;
-
-      // Set a type.
-      if (TypeList[NextTypeID] == 0)
-        TypeList[NextTypeID] = StructType::create(Context);
-
-      std::vector<Type*> EltTys;
-      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
-        if (Type *Elt = getTypeByIDOrNull(Record[i]))
-          EltTys.push_back(Elt);
-        else
-          break;
-      }
-
-      if (EltTys.size() != Record.size()-1)
-        break;      // Not all elements are ready.
-      
-      cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
-      ResultTy = TypeList[NextTypeID];
-      TypeList[NextTypeID] = 0;
-      break;
-    }
-    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
-      //          [pointee type, address space]
-      if (Record.size() < 1)
-        return Error("Invalid POINTER type record");
-      unsigned AddressSpace = 0;
-      if (Record.size() == 2)
-        AddressSpace = Record[1];
-      if ((ResultTy = getTypeByIDOrNull(Record[0])))
-        ResultTy = PointerType::get(ResultTy, AddressSpace);
-      break;
-    }
-    case bitc::TYPE_CODE_FUNCTION_OLD: {
-      // FIXME: attrid is dead, remove it in LLVM 3.0
-      // FUNCTION: [vararg, attrid, retty, paramty x N]
-      if (Record.size() < 3)
-        return Error("Invalid FUNCTION type record");
-      std::vector<Type*> ArgTys;
-      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
-        if (Type *Elt = getTypeByIDOrNull(Record[i]))
-          ArgTys.push_back(Elt);
-        else
-          break;
-      }
-      if (ArgTys.size()+3 != Record.size())
-        break;  // Something was null.
-      if ((ResultTy = getTypeByIDOrNull(Record[2])))
-        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
-      break;
-    }
-    case bitc::TYPE_CODE_FUNCTION: {
-      // FUNCTION: [vararg, retty, paramty x N]
-      if (Record.size() < 2)
-        return Error("Invalid FUNCTION type record");
-      std::vector<Type*> ArgTys;
-      for (unsigned i = 2, e = Record.size(); i != e; ++i) {
-        if (Type *Elt = getTypeByIDOrNull(Record[i]))
-          ArgTys.push_back(Elt);
-        else
-          break;
-      }
-      if (ArgTys.size()+2 != Record.size())
-        break;  // Something was null.
-      if ((ResultTy = getTypeByIDOrNull(Record[1])))
-        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
-      break;
-    }
-    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
-      if (Record.size() < 2)
-        return Error("Invalid ARRAY type record");
-      if ((ResultTy = getTypeByIDOrNull(Record[1])))
-        ResultTy = ArrayType::get(ResultTy, Record[0]);
-      break;
-    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
-      if (Record.size() < 2)
-        return Error("Invalid VECTOR type record");
-      if ((ResultTy = getTypeByIDOrNull(Record[1])))
-        ResultTy = VectorType::get(ResultTy, Record[0]);
-      break;
-    }
-    
-    if (NextTypeID >= TypeList.size())
-      return Error("invalid TYPE table");
-    
-    if (ResultTy && TypeList[NextTypeID] == 0) {
-      ++NumTypesRead;
-      ReadAnyTypes = true;
-      
-      TypeList[NextTypeID] = ResultTy;
-    }
-    
-    ++NextTypeID;
-  }
-}
-
-
-bool BitcodeReader::ParseOldTypeSymbolTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
-    return Error("Malformed block record");
-
-  SmallVector<uint64_t, 64> Record;
-
-  // Read all the records for this type table.
-  std::string TypeName;
-  while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of type symbol table block");
-      return false;
-    }
-
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
-
-    // Read a record.
-    Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
-    default:  // Default behavior: unknown type.
-      break;
-    case bitc::TST_CODE_ENTRY:    // TST_ENTRY: [typeid, namechar x N]
-      if (ConvertToString(Record, 1, TypeName))
-        return Error("Invalid TST_ENTRY record");
-      unsigned TypeID = Record[0];
-      if (TypeID >= TypeList.size())
-        return Error("Invalid Type ID in TST_ENTRY record");
-
-      // Only apply the type name to a struct type with no name.
-      if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
-        if (!STy->isLiteral() && !STy->hasName())
-          STy->setName(TypeName);
-      TypeName.clear();
-      break;
-    }
-  }
-}
-
 bool BitcodeReader::ParseValueSymbolTable() {
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return Error("Malformed block record");
@@ -1553,6 +1287,50 @@ bool BitcodeReader::ParseConstants() {
   return false;
 }
 
+bool BitcodeReader::ParseUseLists() {
+  if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of use-list table block");
+      return false;
+    }
+    
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+    
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    
+    // Read a use list record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+      unsigned RecordLength = Record.size();
+      if (RecordLength < 1)
+        return Error ("Invalid UseList reader!");
+      UseListRecords.push_back(Record);
+      break;
+    }
+    }
+  }
+}
+
 /// RememberAndSkipFunctionBody - When we see the block for a function body,
 /// remember where it is and then skip it.  This lets us lazily deserialize the
 /// functions.
@@ -1636,14 +1414,6 @@ bool BitcodeReader::ParseModule() {
         if (ParseTypeTable())
           return true;
         break;
-      case bitc::TYPE_BLOCK_ID_OLD:
-        if (ParseOldTypeTable())
-          return true;
-        break;
-      case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
-        if (ParseOldTypeSymbolTable())
-          return true;
-        break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (ParseValueSymbolTable())
           return true;
@@ -1667,6 +1437,10 @@ bool BitcodeReader::ParseModule() {
         if (RememberAndSkipFunctionBody())
           return true;
         break;
+      case bitc::USELIST_BLOCK_ID:
+        if (ParseUseLists())
+          return true;
+        break;
       }
       continue;
     }
@@ -2975,12 +2749,6 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
   }
   std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
 
-  // Upgrade to new EH scheme. N.B. This will go away in 3.1.
-  UpgradeExceptionHandling(M);
-
-  // Check debug info intrinsics.
-  CheckDebugInfoIntrinsics(TheModule);
-
   return false;
 }
 
@@ -3026,6 +2794,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
     return 0;
   }
 
+  // TODO: Restore the use-lists to the in-memory state when the bitcode was
+  // written.  We must defer until the Module has been fully materialized.
+
   return M;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 6e6118c..978b15b 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -135,6 +135,7 @@ class BitcodeReader : public GVMaterializer {
   BitcodeReaderValueList ValueList;
   BitcodeReaderMDValueList MDValueList;
   SmallVector<Instruction *, 64> InstructionList;
+  SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
 
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
@@ -211,7 +212,6 @@ public:
   bool ParseTriple(std::string &Triple);
 private:
   Type *getTypeByID(unsigned ID);
-  Type *getTypeByIDOrNull(unsigned ID);
   Value *getFnValueByID(unsigned ID, Type *Ty) {
     if (Ty && Ty->isMetadataTy())
       return MDValueList.getValueFwdRef(ID);
@@ -259,10 +259,8 @@ private:
   bool ParseModule();
   bool ParseAttributeBlock();
   bool ParseTypeTable();
-  bool ParseOldTypeTable();         // FIXME: Remove in LLVM 3.1
   bool ParseTypeTableBody();
 
-  bool ParseOldTypeSymbolTable();   // FIXME: Remove in LLVM 3.1
   bool ParseValueSymbolTable();
   bool ParseConstants();
   bool RememberAndSkipFunctionBody();
@@ -271,6 +269,7 @@ private:
   bool ParseMetadata();
   bool ParseMetadataAttachment();
   bool ParseModuleTriple(std::string &Triple);
+  bool ParseUseLists();
 };
   
 } // End llvm namespace
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index 37bebc4..693d431 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMBitReader
   BitReader.cpp
   BitcodeReader.cpp
   )
-
-add_llvm_library_dependencies(LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt
index 948b335..c85a87b 100644
--- a/lib/Bitcode/Reader/LLVMBuild.txt
+++ b/lib/Bitcode/Reader/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = BitReader
 parent = Bitcode
 required_libraries = Core Support
-
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index e758f94..d980163 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Operator.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,6 +32,12 @@
 #include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
+                              cl::desc("Turn on experimental support for "
+                                       "use-list order preservation."),
+                              cl::init(false), cl::Hidden);
+
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
@@ -194,11 +201,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
+  uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+
   // Abbrev for TYPE_CODE_POINTER.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
   unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
 
@@ -207,8 +215,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Abbrev for TYPE_CODE_STRUCT_ANON.
@@ -216,8 +224,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Abbrev for TYPE_CODE_STRUCT_NAME.
@@ -232,16 +240,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
   
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Emit an entry count so the reader can reserve space.
@@ -497,8 +505,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
 
   // Emit the function proto information.
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
-    // FUNCTION:  [type, callingconv, isproto, paramattr,
-    //             linkage, alignment, section, visibility, gc, unnamed_addr]
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
+    //             section, visibility, gc, unnamed_addr]
     Vals.push_back(VE.getTypeID(F->getType()));
     Vals.push_back(F->getCallingConv());
     Vals.push_back(F->isDeclaration());
@@ -518,6 +526,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   // Emit the alias information.
   for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
        AI != E; ++AI) {
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
     Vals.push_back(VE.getTypeID(AI->getType()));
     Vals.push_back(VE.getValueID(AI->getAliasee()));
     Vals.push_back(getEncodedLinkage(AI));
@@ -1571,6 +1580,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+// Sort the Users based on the order in which the reader parses the bitcode 
+// file.
+static bool bitcodereader_order(const User *lhs, const User *rhs) {
+  // TODO: Implement.
+  return true;
+}
+
+static void WriteUseList(const Value *V, const ValueEnumerator &VE,
+                         BitstreamWriter &Stream) {
+
+  // One or zero uses can't get out of order.
+  if (V->use_empty() || V->hasNUses(1))
+    return;
+
+  // Make a copy of the in-memory use-list for sorting.
+  unsigned UseListSize = std::distance(V->use_begin(), V->use_end());
+  SmallVector<const User*, 8> UseList;
+  UseList.reserve(UseListSize);
+  for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
+       I != E; ++I) {
+    const User *U = *I;
+    UseList.push_back(U);
+  }
+
+  // Sort the copy based on the order read by the BitcodeReader.
+  std::sort(UseList.begin(), UseList.end(), bitcodereader_order);
+
+  // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
+  // sorted list (i.e., the expected BitcodeReader in-memory use-list).
+
+  // TODO: Emit the USELIST_CODE_ENTRYs.
+}
+
+static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
+                                 BitstreamWriter &Stream) {
+  VE.incorporateFunction(*F);
+
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI)
+    WriteUseList(AI, VE, Stream);
+  for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
+       ++BB) {
+    WriteUseList(BB, VE, Stream);
+    for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
+         ++II) {
+      WriteUseList(II, VE, Stream);
+      for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
+           OI != E; ++OI) {
+        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+            isa<InlineAsm>(*OI))
+          WriteUseList(*OI, VE, Stream);
+      }
+    }
+  }
+  VE.purgeFunction();
+}
+
+// Emit use-lists.
+static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+
+  // XXX: this modifies the module, but in a way that should never change the
+  // behavior of any pass or codegen in LLVM. The problem is that GVs may
+  // contain entries in the use_list that do not exist in the Module and are
+  // not stored in the .bc file.
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    I->removeDeadConstantUsers();
+  
+  // Write the global variables.
+  for (Module::const_global_iterator GI = M->global_begin(), 
+         GE = M->global_end(); GI != GE; ++GI) {
+    WriteUseList(GI, VE, Stream);
+
+    // Write the global variable initializers.
+    if (GI->hasInitializer())
+      WriteUseList(GI->getInitializer(), VE, Stream);
+  }
+
+  // Write the functions.
+  for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+    WriteUseList(FI, VE, Stream);
+    if (!FI->isDeclaration())
+      WriteFunctionUseList(FI, VE, Stream);
+  }
+
+  // Write the aliases.
+  for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+       AI != AE; ++AI) {
+    WriteUseList(AI, VE, Stream);
+    WriteUseList(AI->getAliasee(), VE, Stream);
+  }
+
+  Stream.ExitBlock();
+}
 
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream) {
@@ -1616,6 +1721,10 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit names for globals/functions etc.
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
+  // Emit use-lists.
+  if (EnablePreserveUseListOrdering)
+    WriteModuleUseLists(M, VE, Stream);
+
   Stream.ExitBlock();
 }
 
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
index 3cf9056..f097b09 100644
--- a/lib/Bitcode/Writer/CMakeLists.txt
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMBitWriter
   BitcodeWriterPass.cpp
   ValueEnumerator.cpp
   )
-
-add_llvm_library_dependencies(LLVMBitWriter
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
index 39ff04e..7d9e1de 100644
--- a/lib/Bitcode/Writer/LLVMBuild.txt
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = BitWriter
 parent = Bitcode
 required_libraries = Core Support
-
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9ae9905..1c4d670 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -19,6 +19,8 @@
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   OptimizeConstants(FirstConstant, Values.size());
 }
 
-
 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
   InstructionMapType::const_iterator I = InstructionMap.find(Inst);
   assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
   return I->second-1;
 }
 
+void ValueEnumerator::dump() const {
+  print(dbgs(), ValueMap, "Default");
+  dbgs() << '\n';
+  print(dbgs(), MDValueMap, "MetaData");
+  dbgs() << '\n';
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+                            const char *Name) const {
+
+  OS << "Map Name: " << Name << "\n";
+  OS << "Size: " << Map.size() << "\n";
+  for (ValueMapType::const_iterator I = Map.begin(),
+         E = Map.end(); I != E; ++I) {
+
+    const Value *V = I->first;
+    if (V->hasName())
+      OS << "Value: " << V->getName();
+    else
+      OS << "Value: [null]\n";
+    V->dump();
+
+    OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      if (UI != V->use_begin())
+        OS << ",";
+      if((*UI)->hasName())
+        OS << " " << (*UI)->getName();
+      else
+        OS << " [null]";
+
+    }
+    OS <<  "\n\n";
+  }
+}
+
 // Optimize constant ordering.
 namespace {
   struct CstSortPredicate {
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b6fc920..a6ca536 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -32,6 +32,7 @@ class NamedMDNode;
 class AttrListPtr;
 class ValueSymbolTable;
 class MDSymbolTable;
+class raw_ostream;
 
 class ValueEnumerator {
 public:
@@ -83,6 +84,9 @@ private:
 public:
   ValueEnumerator(const Module *M);
 
+  void dump() const;
+  void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
   unsigned getValueID(const Value *V) const;
 
   unsigned getTypeID(Type *T) const {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7..6cf4571 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   assert(State == NULL);
   State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
 
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI)) {
       DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
       State->UnionGroups(Reg, 0);
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register uses for this instruction and update
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc010..0c84be5 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
@@ -149,33 +150,40 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
 /// consideration of global floating-point math flags.
 ///
 ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
   switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
+  case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+  case FCmpInst::FCMP_OEQ:   return ISD::SETOEQ;
+  case FCmpInst::FCMP_OGT:   return ISD::SETOGT;
+  case FCmpInst::FCMP_OGE:   return ISD::SETOGE;
+  case FCmpInst::FCMP_OLT:   return ISD::SETOLT;
+  case FCmpInst::FCMP_OLE:   return ISD::SETOLE;
+  case FCmpInst::FCMP_ONE:   return ISD::SETONE;
+  case FCmpInst::FCMP_ORD:   return ISD::SETO;
+  case FCmpInst::FCMP_UNO:   return ISD::SETUO;
+  case FCmpInst::FCMP_UEQ:   return ISD::SETUEQ;
+  case FCmpInst::FCMP_UGT:   return ISD::SETUGT;
+  case FCmpInst::FCMP_UGE:   return ISD::SETUGE;
+  case FCmpInst::FCMP_ULT:   return ISD::SETULT;
+  case FCmpInst::FCMP_ULE:   return ISD::SETULE;
+  case FCmpInst::FCMP_UNE:   return ISD::SETUNE;
+  case FCmpInst::FCMP_TRUE:  return ISD::SETTRUE;
+  default: break;
+  }
+  llvm_unreachable("Invalid FCmp predicate opcode!");
+  return ISD::SETFALSE;
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+  switch (CC) {
+    case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+    case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+    case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+    case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+    case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+    case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+    default: break;
   }
-  if (NoNaNsFPMath)
-    return FOC;
-  else
-    return FPC;
+  return CC;
 }
 
 /// getICmpCondCode - Return the ISD condition code corresponding to
@@ -221,12 +229,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // longjmp on x86), it can end up causing miscompilation that has not
   // been fully understood.
   if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+       !isa<UnreachableInst>(Term))) return false;
 
   // If I will have a chain, make sure no other instruction that will have a
   // chain interposes between I and the return.
   if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
+      !isSafeToSpeculativelyExecute(I))
     for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
          --BBI) {
       if (&*BBI == I)
@@ -235,7 +244,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
       if (isa<DbgInfoIntrinsic>(BBI))
         continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
+          !isSafeToSpeculativelyExecute(BBI))
         return false;
     }
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 711b796..0c4d0d5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1308,7 +1308,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
   }
 
   // Emit the function pointers in reverse priority order.
-  switch (MAI->getStructorOutputOrder()) {
+  switch (getObjFileLowering().getStructorOutputOrder()) {
   case Structors::None:
     break;
   case Structors::PriorityOrder:
@@ -1659,6 +1659,28 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
     AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 }
 
+static void LowerVectorConstant(const Constant *CV, unsigned AddrSpace,
+                                AsmPrinter &AP) {
+  // Look through bitcasts
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV))
+    if (CE->getOpcode() == Instruction::BitCast)
+      CV = CE->getOperand(0);
+
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
+
+  // If we get here, we're stuck; report the problem to the user.
+  // FIXME: Are there any other useful tricks for vectors?
+  {
+    std::string S;
+    raw_string_ostream OS(S);
+    OS << "Unsupported vector expression in static initializer: ";
+    WriteAsOperand(OS, CV, /*PrintType=*/false,
+                   !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+    report_fatal_error(OS.str());
+  }
+}
+
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   // Print the fields in successive locations. Pad to align if needed!
@@ -1813,8 +1835,8 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
     return;
   }
 
-  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return EmitGlobalConstantVector(V, AddrSpace, AP);
+  if (CV->getType()->isVectorTy())
+    return LowerVectorConstant(CV, AddrSpace, AP);
 
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
@@ -1987,7 +2009,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
 void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB->getAlignment())
-    EmitAlignment(Log2_32(Align));
+    EmitAlignment(Align);
 
   // If the block has its address taken, emit any labels that were used to
   // reference the block.  It is possible that there is more than one label
@@ -2082,7 +2104,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     MachineInstr &MI = *II;
 
     // If it is not a simple branch, we are in a table somewhere.
-    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+    if (!MI.isBranch() || MI.isIndirectBranch())
       return false;
 
     // If we are the operands of one of the branches, this is not
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index f6ce17d..58fe2ed 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -12,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
-
-add_llvm_library_dependencies(LLVMAsmPrinter
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 237998a..8cb5156 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -791,13 +791,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         DISubprogram SP(Element);
         ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
         if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_protected);
         else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_private);
         else 
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
           addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -988,7 +988,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
-    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIEBlock *Block = getDIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1398,17 +1398,17 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
   else 
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
-    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index dc46a58..a3db96a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -442,6 +442,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
+  // Add name to the name table, we do this here because we're guaranteed
+  // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+  addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+  
   return ScopeDIE;
 }
 
@@ -1414,7 +1418,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
   
-  if (!DisableFramePointerElim(*MF))
+  if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
     TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
                    dwarf::DW_FORM_flag, 1);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index e0f2e85..bf7f7ee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -184,7 +184,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 /// CallToNoUnwindFunction - Return `true' if this is a call to a function
 /// marked `nounwind'. Return `false' otherwise.
 bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
-  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+  assert(MI->isCall() && "This should be a call instruction!");
 
   bool MarkedNoUnwind = false;
   bool SawFunc = false;
@@ -243,7 +243,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
       if (!MI->isLabel()) {
-        if (MI->getDesc().isCall())
+        if (MI->isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
         continue;
       }
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
index 0f2059f..20b1f7b 100644
--- a/lib/CodeGen/AsmPrinter/LLVMBuild.txt
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = AsmPrinter
 parent = Libraries
 required_libraries = Analysis CodeGen Core MC MCParser Support Target
-
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 5dec368..89894c3 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -432,10 +432,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
   for (; I != E; ++I) {
     if (I->isDebugValue())
       continue;
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isCall())
+    if (I->isCall())
       Time += 10;
-    else if (MCID.mayLoad() || MCID.mayStore())
+    else if (I->mayLoad() || I->mayStore())
       Time += 2;
     else
       ++Time;
@@ -502,7 +501,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
       break;
     }
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
     ++NumTerms;
   }
   return NumTerms;
@@ -550,8 +549,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // heuristics.
   unsigned EffectiveTailLen = CommonTailLen;
   if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
-      !MBB1->back().getDesc().isBarrier() &&
-      !MBB2->back().getDesc().isBarrier())
+      !MBB1->back().isBarrier() &&
+      !MBB2->back().isBarrier())
     ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
@@ -927,8 +926,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
-      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
-      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      // The 1 below can occur as a result of removing blocks in
+      // TryTailMergeBlocks.
+      PredBB = prior(I);     // this may have been changed in TryTailMergeBlocks
       if (MergePotentials.size() == 1 &&
           MergePotentials.begin()->getBlock() != PredBB)
         FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -983,7 +983,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
     if (!MBBI->isDebugValue())
       break;
   }
-  return (MBBI->getDesc().isBranch());
+  return (MBBI->isBranch());
 }
 
 /// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1011,7 +1011,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   MachineBasicBlock::iterator MBB2I = --MBB2->end();
   while (MBB2I->isDebugValue())
     --MBB2I;
-  return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+  return MBB2I->isCall() && !MBB1I->isCall();
 }
 
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index c8d4dcf..7aee3bb 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMCodeGen
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
+  DFAPacketizer.cpp
   DwarfEHPrepare.cpp
   EdgeBundles.cpp
   ELFCodeEmitter.cpp
@@ -46,6 +47,7 @@ add_llvm_library(LLVMCodeGen
   MachineFunctionPass.cpp
   MachineFunctionPrinterPass.cpp
   MachineInstr.cpp
+  MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
   MachineLoopRanges.cpp
@@ -87,27 +89,18 @@ add_llvm_library(LLVMCodeGen
   Spiller.cpp
   SpillPlacement.cpp
   SplitKit.cpp
-  Splitter.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
   StrongPHIElimination.cpp
   TailDuplication.cpp
+  TargetFrameLoweringImpl.cpp
   TargetInstrInfoImpl.cpp
   TargetLoweringObjectFileImpl.cpp
+  TargetOptionsImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
   )
 
-add_llvm_library_dependencies(LLVMCodeGen
-  LLVMAnalysis
-  LLVMCore
-  LLVMMC
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
-
 add_subdirectory(SelectionDAG)
 add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 3112c22..48b71d9 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -45,7 +45,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeRegisterCoalescerPass(Registry);
   initializeRenderMachineFunctionPass(Registry);
   initializeSlotIndexesPass(Registry);
-  initializeLoopSplitterPass(Registry);
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeStrongPHIEliminationPass(Registry);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59..128143e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -54,7 +54,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Clear "do not change" set.
   KeepRegs.clear();
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
 
   // Determine the live-out physregs for this block.
   if (IsReturnBlock) {
@@ -193,8 +193,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register operands for this instruction and update
@@ -572,7 +572,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI))
       // If this instruction's defs have special allocation requirement, don't
       // break this anti-dependency.
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 0000000..16276bd
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,98 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+                             const unsigned *SET):
+  InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+  DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+//                           transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+//                         for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+  unsigned ThisState = DFAStateEntryTable[state];
+  unsigned NextStateInTable = DFAStateEntryTable[state+1];
+  // Early exit in case CachedTable has already contains this
+  // state's transitions.
+  if (CachedTable.count(UnsignPair(state,
+                                   DFAStateInputTable[ThisState][0])))
+    return;
+
+  for (unsigned i = ThisState; i < NextStateInTable; i++)
+    CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+      DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  assert(CachedTable.count(StateTrans) != 0);
+  CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  reserveResources(&MID);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0c..ba135e1 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -102,7 +102,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     LivePhysRegs = ReservedRegs;
 
     // Also add any explicit live-out physregs for this block.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+    if (!MBB->empty() && MBB->back().isReturn())
       for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
            LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
         unsigned Reg = *LOI;
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 300f037..4ec75cd 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -454,7 +454,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
   assert(!MI->isDebugValue() && "Won't process debug values");
   const MCInstrDesc &MCID = MI->getDesc();
   for (unsigned i = 0,
-         e = MCID.isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+         e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
          i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140e..b5f107d 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -62,8 +62,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = MBBI++;
 
       // If MI is a pseudo, expand it.
-      const MCInstrDesc &MCID = MI->getDesc();
-      if (MCID.usesCustomInsertionHook()) {
+      if (MI->usesCustomInsertionHook()) {
         Changed = true;
         MachineBasicBlock *NewMBB =
           TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8..3d23db0 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -207,7 +207,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
       ++mi;
 
       // Only expand pseudos.
-      if (!MI->getDesc().isPseudo())
+      if (!MI->isPseudo())
         continue;
 
       // Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 9349797..e2c7132 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -386,7 +386,7 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
                                  BBE = MF.end(); BBI != BBE; ++BBI)
     for (MachineBasicBlock::iterator MI = BBI->begin(),
                                      ME = BBI->end(); MI != ME; ++MI)
-      if (MI->getDesc().isCall())
+      if (MI->isCall())
         VisitCallPoint(MI);
 }
 
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index d888939..bd31fdf 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
   // blocks, move the end iterators up past any branch instructions.
   while (TIE != TIB) {
     --TIE;
-    if (!TIE->getDesc().isBranch())
+    if (!TIE->isBranch())
       break;
   }
   while (FIE != FIB) {
     --FIE;
-    if (!FIE->getDesc().isBranch())
+    if (!FIE->isBranch())
       break;
   }
 
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (I->isDebugValue())
       continue;
 
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isNotDuplicable())
+    if (I->isNotDuplicable())
       BBI.CannotBeCopied = true;
 
     bool isPredicated = TII->isPredicated(I);
-    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+    bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
 
     if (!isCondBr) {
       if (!isPredicated) {
@@ -1395,9 +1394,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
   for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
          E = FromBBI.BB->end(); I != E; ++I) {
-    const MCInstrDesc &MCID = I->getDesc();
     // Do not copy the end of the block branches.
-    if (IgnoreBr && MCID.isBranch())
+    if (IgnoreBr && I->isBranch())
       break;
 
     MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 59907d9..9bf810e 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -759,7 +759,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
     // Find all spills and copies of VNI.
     for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
          MachineInstr *MI = UI.skipInstruction();) {
-      if (!MI->isCopy() && !MI->getDesc().mayStore())
+      if (!MI->isCopy() && !MI->mayStore())
         continue;
       SlotIndex Idx = LIS.getInstructionIndex(MI);
       if (LI->getVNInfoAt(Idx) != VNI)
@@ -878,7 +878,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 
   // Before rematerializing into a register for a single instruction, try to
   // fold a load into the instruction. That avoids allocating a new register.
-  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+  if (RM.OrigMI->canFoldAsLoad() &&
       foldMemoryOperand(MI, Ops, RM.OrigMI)) {
     Edit->markRematerialized(RM.ParentVNI);
     ++NumFoldedLoads;
@@ -957,7 +957,7 @@ void InlineSpiller::reMaterializeAll() {
   if (DeadDefs.empty())
     return;
   DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
-  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill);
 
   // Get rid of deleted and empty intervals.
   for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -1240,7 +1240,7 @@ void InlineSpiller::spillAll() {
   // Hoisted spills may cause dead code.
   if (!DeadDefs.empty()) {
     DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
-    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill);
   }
 
   // Finally delete the SnippetCopies.
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 2eebb08..fee0347 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -15,9 +15,11 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
 [component_0]
 type = Library
 name = CodeGen
 parent = Libraries
 required_libraries = Analysis Core MC Scalar Support Target TransformUtils
-
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 03b5693..62227fd 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -41,10 +41,6 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-namespace llvm {
-  bool EnableFastISel;
-}
-
 static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
     cl::desc("Disable Post Regalloc"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -114,9 +110,10 @@ EnableFastISelOption("fast-isel", cl::Hidden,
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
                                      StringRef CPU, StringRef FS,
+                                     TargetOptions Options,
                                      Reloc::Model RM, CodeModel::Model CM,
                                      CodeGenOpt::Level OL)
-  : TargetMachine(T, Triple, CPU, FS) {
+  : TargetMachine(T, Triple, CPU, FS, Options) {
   CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
   AsmInfo = T.createMCAsmInfo(Triple);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
@@ -275,14 +272,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
   return false; // success!
 }
 
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
-  if (PrintMachineCode)
+void LLVMTargetMachine::printNoVerify(PassManagerBase &PM,
+                                      const char *Banner) const {
+  if (Options.PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 }
 
-static void printAndVerify(PassManagerBase &PM,
-                           const char *Banner) {
-  if (PrintMachineCode)
+void LLVMTargetMachine::printAndVerify(PassManagerBase &PM,
+                                       const char *Banner) const {
+  if (Options.PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 
   if (VerifyMachineCode)
@@ -380,7 +378,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (EnableFastISelOption == cl::BOU_TRUE ||
       (getOptLevel() == CodeGenOpt::None &&
        EnableFastISelOption != cl::BOU_FALSE))
-    EnableFastISel = true;
+    Options.EnableFastISel = true;
 
   // Ask the target for an isel.
   if (addInstSelector(PM))
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index eb54baa7..c35302a 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -920,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
   }
 
   // Don't insert anything after the first terminator, though.
-  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
-                                    llvm::next(MachineBasicBlock::iterator(MI));
+  return MI->isTerminator() ? MBB->getFirstTerminator() :
+                              llvm::next(MachineBasicBlock::iterator(MI));
 }
 
 DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index edcfebe..1e58173 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -794,7 +794,7 @@ LiveIntervals::getLastSplitPoint(const LiveInterval &li,
   MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
   while (I != B) {
     --I;
-    if (I->getDesc().isCall())
+    if (I->isCall())
       return I;
   }
   // The block contains no calls that can throw, so use the first terminator.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 2f283b2..a470877 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -129,7 +129,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
   }
 
   // If only cheap remats were requested, bail out early.
-  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+  if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
     return false;
 
   // Verify that all used registers are available with the same values.
@@ -174,7 +174,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
     if (MO.isDef()) {
       if (DefMI && DefMI != MI)
         return false;
-      if (!MI->getDesc().canFoldAsLoad())
+      if (!MI->canFoldAsLoad())
         return false;
       DefMI = MI;
     } else if (!MO.isUndef()) {
@@ -210,7 +210,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
 
 void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
                                       LiveIntervals &LIS, VirtRegMap &VRM,
-                                      const TargetInstrInfo &TII) {
+                                      const TargetInstrInfo &TII,
+                                      ArrayRef<unsigned> RegsBeingSpilled) {
   SetVector<LiveInterval*,
             SmallVector<LiveInterval*, 8>,
             SmallPtrSet<LiveInterval*, 8> > ToShrink;
@@ -290,6 +291,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
       delegate_->LRE_WillShrinkVirtReg(LI->reg);
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
+    
+    // Don't create new intervals for a register being spilled.
+    // The new intervals would have to be spilled anyway so its not worth it.
+    // Also they currently aren't spilled so creating them and not spilling
+    // them results in incorrect code.
+    bool BeingSpilled = false;
+    for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+      if (LI->reg == RegsBeingSpilled[i]) {
+        BeingSpilled = true;
+        break;
+      }
+    }
+    
+    if (BeingSpilled) continue;
+    
 
     // LI may have been separated, create new intervals.
     LI->RenumberValues(LIS);
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
index 9b0a671..057d9bb 100644
--- a/lib/CodeGen/LiveRangeEdit.h
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -191,9 +191,14 @@ public:
   /// eliminateDeadDefs - Try to delete machine instructions that are now dead
   /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
   /// and further dead efs to be eliminated.
+  /// RegsBeingSpilled lists registers currently being spilled by the register
+  /// allocator.  These registers should not be split into new intervals
+  /// as currently those new intervals are not guaranteed to spill.
   void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
                          LiveIntervals&, VirtRegMap&,
-                         const TargetInstrInfo&);
+                         const TargetInstrInfo&,
+                         ArrayRef<unsigned> RegsBeingSpilled 
+                          = ArrayRef<unsigned>());
 
   /// calculateRegClassAndHint - Recompute register class and hint for each new
   /// register.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9..7477d91 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -590,8 +590,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // them.  The tail callee need not take the same registers as input
     // that it produces as output, and there are dependencies for its input
     // registers elsewhere.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn()
-        && !MBB->back().getDesc().isCall()) {
+    if (!MBB->empty() && MBB->back().isReturn()
+        && !MBB->back().isCall()) {
       MachineInstr *Ret = &MBB->back();
 
       for (MachineRegisterInfo::liveout_iterator
@@ -754,7 +754,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
   const unsigned NumNew = BB->getNumber();
 
   // All registers used by PHI nodes in SuccBB must be live through BB.
-  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+  for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
          BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
       if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index b9d1ef7..6734916 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
 
   // Make sure the instructions have their operands in the reginfo lists.
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+  for (MachineBasicBlock::instr_iterator
+         I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
     I->AddRegOperandsToUseLists(RegInfo);
 
   LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 /// lists.
 void ilist_traits<MachineInstr>::
 transferNodesFromList(ilist_traits<MachineInstr> &fromList,
-                      MachineBasicBlock::iterator first,
-                      MachineBasicBlock::iterator last) {
+                      ilist_iterator<MachineInstr> first,
+                      ilist_iterator<MachineInstr> last) {
   assert(Parent->getParent() == fromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
@@ -140,9 +141,10 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
-  iterator I = begin();
+  instr_iterator I = instr_begin();
   while (I != end() && I->isPHI())
     ++I;
+  assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
   return I;
 }
 
@@ -150,23 +152,63 @@ MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
   while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
     ++I;
+  // FIXME: This needs to change if we wish to bundle labels / dbg_values
+  // inside the bundle.
+  assert(!I->isInsideBundle() &&
+         "First non-phi / non-label instruction is inside a bundle!");
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   iterator I = end();
-  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+  while (I != begin() && ((--I)->isTerminator() || I->isDebugValue()))
     ; /*noop */
-  while (I != end() && !I->getDesc().isTerminator())
+  while (I != end() && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+  const_iterator I = end();
+  while (I != begin() && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != end() && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+  instr_iterator I = instr_end();
+  while (I != instr_begin() && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != instr_end() && !I->isTerminator())
     ++I;
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
-  iterator B = begin(), I = end();
+  // Skip over end-of-block dbg_value instructions.
+  instr_iterator B = instr_begin(), I = instr_end();
   while (I != B) {
     --I;
-    if (I->isDebugValue())
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+  // Skip over end-of-block dbg_value instructions.
+  const_instr_iterator B = instr_begin(), I = instr_end();
+  while (I != B) {
+    --I;
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
       continue;
     return I;
   }
@@ -203,8 +245,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     return;
   }
 
-  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
   if (Indexes)
     OS << Indexes->getMBBStartIdx(this) << '\t';
 
@@ -218,6 +258,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   }
   if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+  if (Alignment) {
+    OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+       << " bytes)";
+    Comma = ", ";
+  }
+
   OS << '\n';
 
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +283,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\n';
   }
 
-  for (const_iterator I = begin(); I != end(); ++I) {
+  for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
     if (Indexes) {
       if (Indexes->hasIndex(I))
         OS << Indexes->getInstructionIndex(I);
       OS << '\t';
     }
     OS << '\t';
+    if (I->isInsideBundle())
+      OS << "  * ";
     I->print(OS, &getParent()->getTarget());
   }
 
@@ -449,8 +497,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
     fromMBB->removeSuccessor(Succ);
 
     // Fix up any PHI nodes in the successor.
-    for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
-         MI != ME && MI->isPHI(); ++MI)
+    for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+           ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
       for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
         MachineOperand &MO = MI->getOperand(i);
         if (MO.getMBB() == fromMBB)
@@ -492,8 +540,8 @@ bool MachineBasicBlock::canFallThrough() {
     // Barrier is predicated and thus no longer an actual control barrier. This
     // is over-conservative though, because if an instruction isn't actually
     // predicated we could still treat it like a barrier.
-    return empty() || !back().getDesc().isBarrier() ||
-           back().getDesc().isPredicable();
+    return empty() || !back().isBarrier() ||
+           back().isPredicable();
   }
 
   // If there is no branch, control always falls through.
@@ -552,7 +600,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   // Collect a list of virtual registers killed by the terminators.
   SmallVector<unsigned, 4> KilledRegs;
   if (LV)
-    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I) {
       MachineInstr *MI = I;
       for (MachineInstr::mop_iterator OI = MI->operands_begin(),
            OE = MI->operands_end(); OI != OE; ++OI) {
@@ -579,7 +628,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   }
 
   // Fix PHI nodes in Succ so they refer to NMBB instead of this
-  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+  for (MachineBasicBlock::instr_iterator
+         i = Succ->instr_begin(),e = Succ->instr_end();
        i != e && i->isPHI(); ++i)
     for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
       if (i->getOperand(ni+1).getMBB() == this)
@@ -595,7 +645,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
     // Restore kills of virtual registers that were killed by the terminators.
     while (!KilledRegs.empty()) {
       unsigned Reg = KilledRegs.pop_back_val();
-      for (iterator I = end(), E = begin(); I != E;) {
+      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
         if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
           continue;
         LV->getVarInfo(Reg).Kills.push_back(I);
@@ -664,6 +714,41 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   return NMBB;
 }
 
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+  if (I->isBundle()) {
+    MachineBasicBlock::iterator E = llvm::next(I);
+    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+  }
+
+  return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+  if (I->isBundle()) {
+    MachineBasicBlock::instr_iterator MII = I; ++MII;
+    while (MII != end() && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII++;
+      Insts.remove(MI);
+    }
+  }
+
+  return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+                               MachineBasicBlock *Other,
+                               MachineBasicBlock::iterator From) {
+  if (From->isBundle()) {
+    MachineBasicBlock::iterator To = llvm::next(From);
+    Insts.splice(where.getInstrIterator(), Other->Insts,
+                 From.getInstrIterator(), To.getInstrIterator());
+    return;
+  }
+
+  Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing function,
 /// and returns it, but does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -687,10 +772,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
                                                MachineBasicBlock *New) {
   assert(Old != New && "Cannot replace self with self!");
 
-  MachineBasicBlock::iterator I = end();
-  while (I != begin()) {
+  MachineBasicBlock::instr_iterator I = instr_end();
+  while (I != instr_begin()) {
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
 
     // Scan the operands of this machine instruction, replacing any uses of Old
     // with New.
@@ -769,17 +854,17 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
 /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
 DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
   DebugLoc DL;
-  MachineBasicBlock::iterator E = end();
-  if (MBBI != E) {
-    // Skip debug declarations, we don't want a DebugLoc from them.
-    MachineBasicBlock::iterator MBBI2 = MBBI;
-    while (MBBI2 != E && MBBI2->isDebugValue())
-      MBBI2++;
-    if (MBBI2 != E)
-      DL = MBBI2->getDebugLoc();
-  }
+  instr_iterator E = instr_end();
+  if (MBBI == E)
+    return DL;
+
+  // Skip debug declarations, we don't want a DebugLoc from them.
+  while (MBBI != E && MBBI->isDebugValue())
+    MBBI++;
+  if (MBBI != E)
+    DL = MBBI->getDebugLoc();
   return DL;
 }
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 55d804b..638d895 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -36,10 +36,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -56,22 +53,6 @@ STATISTIC(UncondBranchTakenFreq,
           "Potential frequency of taking unconditional branches");
 
 namespace {
-/// \brief A structure for storing a weighted edge.
-///
-/// This stores an edge and its weight, computed as the product of the
-/// frequency that the starting block is entered with the probability of
-/// a particular exit block.
-struct WeightedEdge {
-  BlockFrequency EdgeFrequency;
-  MachineBasicBlock *From, *To;
-
-  bool operator<(const WeightedEdge &RHS) const {
-    return EdgeFrequency < RHS.EdgeFrequency;
-  }
-};
-}
-
-namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
 typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
@@ -222,6 +203,9 @@ class MachineBlockPlacement : public MachineFunctionPass {
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
                   SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
                   const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+                                     MachineLoop &L,
+                                     const BlockFilterSet &LoopBlockSet);
   void buildLoopChains(MachineFunction &F, MachineLoop &L);
   void buildCFGChains(MachineFunction &F);
   void AlignLoops(MachineFunction &F);
@@ -546,12 +530,134 @@ void MachineBlockPlacement::buildChain(
     markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
     Chain.merge(BestSucc, &SuccChain);
     BB = *llvm::prior(Chain.end());
-  };
+  }
 
   DEBUG(dbgs() << "Finished forming chain for header block "
                << getBlockNum(*Chain.begin()) << "\n");
 }
 
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+                                       MachineLoop &L,
+                                       const BlockFilterSet &LoopBlockSet) {
+  BlockFrequency BestExitEdgeFreq;
+  MachineBasicBlock *ExitingBB = 0;
+  MachineBasicBlock *LoopingBB = 0;
+  // If there are exits to outer loops, loop rotation can severely limit
+  // fallthrough opportunites unless it selects such an exit. Keep a set of
+  // blocks where rotating to exit with that block will reach an outer loop.
+  SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+  DEBUG(dbgs() << "Finding best loop exit for: "
+               << getBlockName(L.getHeader()) << "\n");
+  for (MachineLoop::block_iterator I = L.block_begin(),
+                                   E = L.block_end();
+       I != E; ++I) {
+    BlockChain &Chain = *BlockToChain[*I];
+    // Ensure that this block is at the end of a chain; otherwise it could be
+    // mid-way through an inner loop or a successor of an analyzable branch.
+    if (*I != *llvm::prior(Chain.end()))
+      continue;
+
+    // Now walk the successors. We need to establish whether this has a viable
+    // exiting successor and whether it has a viable non-exiting successor.
+    // We store the old exiting state and restore it if a viable looping
+    // successor isn't found.
+    MachineBasicBlock *OldExitingBB = ExitingBB;
+    BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+    // We also compute and store the best looping successor for use in layout.
+    MachineBasicBlock *BestLoopSucc = 0;
+    // FIXME: Due to the performance of the probability and weight routines in
+    // the MBPI analysis, we use the internal weights. This is only valid
+    // because it is purely a ranking function, we don't care about anything
+    // but the relative values.
+    uint32_t BestLoopSuccWeight = 0;
+    // FIXME: We also manually compute the probabilities to avoid quadratic
+    // behavior.
+    uint32_t WeightScale = 0;
+    uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+    for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+                                          SE = (*I)->succ_end();
+         SI != SE; ++SI) {
+      if ((*SI)->isLandingPad())
+        continue;
+      if (*SI == *I)
+        continue;
+      BlockChain &SuccChain = *BlockToChain[*SI];
+      // Don't split chains, either this chain or the successor's chain.
+      if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+        DEBUG(dbgs() << "    " << (LoopBlockSet.count(*SI) ? "looping: "
+                                                           : "exiting: ")
+                     << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (chain conflict)\n");
+        continue;
+      }
+
+      uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+      if (LoopBlockSet.count(*SI)) {
+        DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+        if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+          continue;
+
+        BestLoopSucc = *SI;
+        BestLoopSuccWeight = SuccWeight;
+        continue;
+      }
+
+      BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+      DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
+                   << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+      // Note that we slightly bias this toward an existing layout successor to
+      // retain incoming order in the absence of better information.
+      // FIXME: Should we bias this more strongly? It's pretty weak.
+      if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+          ((*I)->isLayoutSuccessor(*SI) &&
+           !(ExitEdgeFreq < BestExitEdgeFreq))) {
+        BestExitEdgeFreq = ExitEdgeFreq;
+        ExitingBB = *I;
+      }
+
+      if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+        if (ExitLoop->contains(&L))
+          BlocksExitingToOuterLoop.insert(*I);
+    }
+
+    // Restore the old exiting state, no viable looping successor was found.
+    if (!BestLoopSucc) {
+      ExitingBB = OldExitingBB;
+      BestExitEdgeFreq = OldBestExitEdgeFreq;
+      continue;
+    }
+
+    // If this was best exiting block thus far, also record the looping block.
+    if (ExitingBB == *I)
+      LoopingBB = BestLoopSucc;
+  }
+  // Without a candidate exitting block or with only a single block in the
+  // loop, just use the loop header to layout the loop.
+  if (!ExitingBB || L.getNumBlocks() == 1)
+    return L.getHeader();
+
+  // Also, if we have exit blocks which lead to outer loops but didn't select
+  // one of them as the exiting block we are rotating toward, disable loop
+  // rotation altogether.
+  if (!BlocksExitingToOuterLoop.empty() &&
+      !BlocksExitingToOuterLoop.count(ExitingBB))
+    return L.getHeader();
+
+  assert(LoopingBB && "All successors of a loop block are exit blocks!");
+  DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
+  DEBUG(dbgs() << "  Best top block: " << getBlockName(LoopingBB) << "\n");
+  return LoopingBB;
+}
+
 /// \brief Forms basic block chains from the natural loop structures.
 ///
 /// These chains are designed to preserve the existing *structure* of the code
@@ -567,17 +673,21 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
   BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
-  BlockChain &LoopChain = *BlockToChain[L.getHeader()];
+
+  MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+  BlockChain &LoopChain = *BlockToChain[LayoutTop];
 
   // FIXME: This is a really lame way of walking the chains in the loop: we
   // walk the blocks, and use a set to prevent visiting a particular chain
   // twice.
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+  assert(LoopChain.LoopPredecessors == 0);
+  UpdatedPreds.insert(&LoopChain);
   for (MachineLoop::block_iterator BI = L.block_begin(),
                                    BE = L.block_end();
        BI != BE; ++BI) {
     BlockChain &Chain = *BlockToChain[*BI];
-    if (!UpdatedPreds.insert(&Chain) || BI == L.block_begin())
+    if (!UpdatedPreds.insert(&Chain))
       continue;
 
     assert(Chain.LoopPredecessors == 0);
@@ -597,7 +707,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
       BlockWorkList.push_back(*Chain.begin());
   }
 
-  buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet);
+  buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
 
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c1..8c02cd7 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -260,12 +260,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     return false;
 
   // Ignore stuff that we obviously can't move.
-  const MCInstrDesc &MCID = MI->getDesc();  
-  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
-  if (MCID.mayLoad()) {
+  if (MI->mayLoad()) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
@@ -287,7 +286,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
-  if (MI->getDesc().isAsCheapAsAMove()) {
+  if (MI->isAsCheapAsAMove()) {
     MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
     if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +375,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // Commute commutable instructions.
     bool Commuted = false;
-    if (!FoundCSE && MI->getDesc().isCommutable()) {
+    if (!FoundCSE && MI->isCommutable()) {
       MachineInstr *NewMI = TII->commuteInstruction(MI);
       if (NewMI) {
         Commuted = true;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b0ef9d4..ec5a1cd 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -178,6 +178,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsKill = isKill;
   IsDead = isDead;
   IsUndef = isUndef;
+  IsInternalRead = false;
   IsEarlyClobber = false;
   IsDebug = isDebug;
   SubReg = 0;
@@ -240,7 +241,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
-        isEarlyClobber()) {
+        isInternalRead() || isEarlyClobber()) {
       OS << '<';
       bool NeedComma = false;
       if (isDef()) {
@@ -256,14 +257,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
           NeedComma = true;
       }
 
-      if (isKill() || isDead() || isUndef()) {
+      if (isKill() || isDead() || isUndef() || isInternalRead()) {
         if (NeedComma) OS << ',';
-        if (isKill())  OS << "kill";
-        if (isDead())  OS << "dead";
+        NeedComma = false;
+        if (isKill()) {
+          OS << "kill";
+          NeedComma = true;
+        }
+        if (isDead()) {
+          OS << "dead";
+          NeedComma = true;
+        }
         if (isUndef()) {
-          if (isKill() || isDead())
-            OS << ',';
+          if (NeedComma) OS << ',';
           OS << "undef";
+          NeedComma = true;
+        }
+        if (isInternalRead()) {
+          if (NeedComma) OS << ',';
+          OS << "internal";
+          NeedComma = true;
         }
       }
       OS << '>';
@@ -735,6 +748,27 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
   MemRefsEnd = NewMemRefsEnd;
 }
 
+bool
+MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const {
+  if (Type == IgnoreBundle || !isBundle())
+    return getDesc().getFlags() & (1 << MCFlag);
+
+  const MachineBasicBlock *MBB = getParent();
+  MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+  while (MII != MBB->end() && MII->isInsideBundle()) {
+    if (MII->getDesc().getFlags() & (1 << MCFlag)) {
+      if (Type == AnyInBundle)
+        return true;
+    } else {
+      if (Type == AllInBundle)
+        return false;
+    }
+    ++MII;
+  }
+
+  return Type == AllInBundle;
+}
+
 bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
                                  MICheckType Check) const {
   // If opcodes or number of operands are not the same then the two
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
       Other->getNumOperands() != getNumOperands())
     return false;
 
+  if (isBundle()) {
+    // Both instructions are bundles, compare MIs inside the bundle.
+    MachineBasicBlock::const_instr_iterator I1 = *this;
+    MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+    MachineBasicBlock::const_instr_iterator I2 = *Other;
+    MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+    while (++I1 != E1 && I1->isInsideBundle()) {
+      ++I2;
+      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+        return false;
+    }
+  }
+
   // Check operands to make sure they match.
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
 /// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->remove(MI);
+    }
+  }
   getParent()->remove(this);
   return this;
 }
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
 /// block, and deletes it.
 void MachineInstr::eraseFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->erase(MI);
+    }
+  }
   getParent()->erase(this);
 }
 
@@ -887,6 +957,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
   return NULL;
 }
 
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+  assert(isBundle() && "Expecting a bundle");
+
+  MachineBasicBlock::const_instr_iterator I = *this;
+  unsigned Size = 0;
+  while ((++I)->isInsideBundle()) {
+    ++Size;
+  }
+  assert(Size > 1 && "Malformed bundle");
+
+  return Size;
+}
+
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
@@ -1118,6 +1202,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
 
 /// copyPredicates - Copies predicate operand(s) from MI.
 void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isPredicable())
     return;
@@ -1159,13 +1245,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (MCID->mayStore() || MCID->isCall()) {
+  if (mayStore() || isCall()) {
     SawStore = true;
     return false;
   }
 
   if (isLabel() || isDebugValue() ||
-      MCID->isTerminator() || hasUnmodeledSideEffects())
+      isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1173,7 +1259,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (MCID->mayLoad() && !isInvariantLoad(AA))
+  if (mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1299,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
 /// have no volatile memory references.
 bool MachineInstr::hasVolatileMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
-  if (!MCID->mayStore() &&
-      !MCID->mayLoad() &&
-      !MCID->isCall() &&
+  if (!mayStore() &&
+      !mayLoad() &&
+      !isCall() &&
       !hasUnmodeledSideEffects())
     return false;
 
@@ -1239,7 +1325,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
-  if (!MCID->mayLoad())
+  if (!mayLoad())
     return false;
 
   // If the instruction has lost its memoperands, conservatively assume that
@@ -1292,7 +1378,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
 }
 
 bool MachineInstr::hasUnmodeledSideEffects() const {
-  if (getDesc().hasUnmodeledSideEffects())
+  if (hasProperty(MCID::UnmodeledSideEffects))
     return true;
   if (isInlineAsm()) {
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1420,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     // call instructions much less noisy on targets where calls clobber lots
     // of registers. Don't rely on MO.isDead() because we may be called before
     // LiveVariables is run, or we may be looking at a non-allocatable reg.
-    if (MF && getDesc().isCall() &&
+    if (MF && isCall() &&
         MO.isReg() && MO.isImplicit() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 0000000..b766d08
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,180 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+  class UnpackMachineBundles : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification
+    UnpackMachineBundles() : MachineFunctionPass(ID) {
+      initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+  };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundle",
+                "Unpack machine instruction bundles", false, false)
+
+FunctionPass *llvm::createUnpackMachineBundlesPass() {
+  return new UnpackMachineBundles();
+}
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+
+    for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+           MIE = MBB->instr_end(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+
+      // Remove BUNDLE instruction and the InsideBundle flags from bundled
+      // instructions.
+      if (MI->isBundle()) {
+        while (++MII != MIE && MII->isInsideBundle()) {
+          MII->setIsInsideBundle(false);
+          for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = MII->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+        }
+        MI->eraseFromParent();
+
+        Changed = true;
+        continue;
+      }
+
+      ++MII;
+    }
+  }
+
+  return Changed;
+}
+
+/// FinalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (inclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::FinalizeBundle(MachineBasicBlock &MBB,
+                          MachineBasicBlock::instr_iterator FirstMI,
+                          MachineBasicBlock::instr_iterator LastMI) {
+  const TargetMachine &TM = MBB.getParent()->getTarget();
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+                                    TII->get(TargetOpcode::BUNDLE));
+
+  SmallVector<unsigned, 8> LocalDefs;
+  SmallSet<unsigned, 8> LocalDefSet;
+  SmallSet<unsigned, 8> DeadDefSet;
+  SmallSet<unsigned, 8> KilledDefSet;
+  SmallVector<unsigned, 8> ExternUses;
+  SmallSet<unsigned, 8> ExternUseSet;
+  SmallSet<unsigned, 8> KilledUseSet;
+  SmallSet<unsigned, 8> UndefUseSet;
+  SmallVector<MachineOperand*, 4> Defs;
+  do {
+    for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = FirstMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef()) {
+        Defs.push_back(&MO);
+        continue;
+      }
+
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+      if (LocalDefSet.count(Reg)) {
+        MO.setIsInternalRead();
+        if (MO.isKill())
+          // Internal def is now killed.
+          KilledDefSet.insert(Reg);
+      } else {
+        if (ExternUseSet.insert(Reg)) {
+          ExternUses.push_back(Reg);
+          if (MO.isUndef())
+            UndefUseSet.insert(Reg);
+        }
+        if (MO.isKill())
+          // External def is now killed.
+          KilledUseSet.insert(Reg);
+      }
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      MachineOperand &MO = *Defs[i];
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (LocalDefSet.insert(Reg)) {
+        LocalDefs.push_back(Reg);
+        if (MO.isDead()) {
+          DeadDefSet.insert(Reg);
+        }
+      } else {
+        // Re-defined inside the bundle, it's no longer killed.
+        KilledDefSet.erase(Reg);
+        if (!MO.isDead())
+          // Previously defined but dead.
+          DeadDefSet.erase(Reg);
+      }
+
+      if (!MO.isDead()) {
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs) {
+          if (LocalDefSet.insert(SubReg))
+            LocalDefs.push_back(SubReg);
+        }
+      }
+    }
+
+    FirstMI->setIsInsideBundle();
+    Defs.clear();
+  } while (FirstMI++ != LastMI);
+
+  SmallSet<unsigned, 8> Added;
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Reg = LocalDefs[i];
+    if (Added.insert(Reg)) {
+      // If it's not live beyond end of the bundle, mark it dead.
+      bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+      MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+                 getImplRegState(true));
+    }
+  }
+
+  for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+    unsigned Reg = ExternUses[i];
+    bool isKill = KilledUseSet.count(Reg);
+    bool isUndef = UndefUseSet.count(Reg);
+    MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+               getImplRegState(true));
+  }
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index e5e8c51..764429d 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -765,7 +765,7 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction 
 /// loads from global offset table or constant pool.
 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
-  assert (MI.getDesc().mayLoad() && "Expected MI that loads!");
+  assert (MI.mayLoad() && "Expected MI that loads!");
   for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
 	 E = MI.memoperands_end(); I != E; ++I) {
     if (const Value *V = (*I)->getValue()) {
@@ -792,7 +792,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
   // from constant memory are not safe to speculate all the time, for example
   // indexed load from a jump table.
   // Stores and side effects are already checked by isSafeToMove.
-  if (I.getDesc().mayLoad() && !isLoadFromGOTOrConstantPool(I) && 
+  if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 
       !IsGuaranteedToExecute(I.getParent()))
     return false;
 
@@ -921,7 +921,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
 /// the operand latency between its def and a use is one or less.
 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
-  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+  if (MI.isAsCheapAsAMove() || MI.isCopyLike())
     return true;
   if (!InstrItins || InstrItins->isEmpty())
     return false;
@@ -1105,7 +1105,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // Don't unfold simple loads.
-  if (MI->getDesc().canFoldAsLoad())
+  if (MI->canFoldAsLoad())
     return 0;
 
   // If not, we may be able to unfold a load and hoist that.
@@ -1141,8 +1141,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   assert(NewMIs.size() == 2 &&
          "Unfolded a load into multiple instructions!");
   MachineBasicBlock *MBB = MI->getParent();
-  MBB->insert(MI, NewMIs[0]);
-  MBB->insert(MI, NewMIs[1]);
+  MachineBasicBlock::iterator Pos = MI;
+  MBB->insert(Pos, NewMIs[0]);
+  MBB->insert(Pos, NewMIs[1]);
   // If unfolding produced a load that wasn't loop-invariant or profitable to
   // hoist, discard the new instructions and bail.
   if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df2..8cb6112 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
   if (BB->empty())
     return 0;
 
-  MachineBasicBlock::iterator I = BB->front();
+  MachineBasicBlock::iterator I = BB->begin();
   if (!I->isPHI())
     return 0;
 
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
     return DupPHI;
 
   // Otherwise, we do need a PHI: insert one now.
-  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
   MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
                                            Loc, VRC, MRI, TII);
 
@@ -311,7 +311,7 @@ public:
   /// Add it into the specified block and return the register.
   static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
                                  MachineSSAUpdater *Updater) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
     MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
                                      Updater->VRC, Updater->MRI,
                                      Updater->TII);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49..e47360d 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -90,6 +90,12 @@ namespace {
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
+    MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+               bool &BreakPHIEdge);
+    bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, 
+                              MachineBasicBlock *MBB,
+                              MachineBasicBlock *SuccToSinkTo);
+
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
   };
@@ -147,14 +153,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
 
+  // Ignore debug uses because debug info doesn't affect the code.
   if (MRI->use_nodbg_empty(Reg))
     return true;
 
-  // Ignoring debug uses is necessary so debug info doesn't affect the code.
-  // This may leave a referencing dbg_value in the original block, before
-  // the definition of the vreg.  Dwarf generator handles this although the
-  // user might not get the right info at runtime.
-
   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
   // into and they are all PHI nodes. In this case, machine-sink must break
   // the critical edge first. e.g.
@@ -291,7 +293,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
   if (!CEBCandidates.insert(std::make_pair(From, To)))
     return true;
 
-  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+  if (!MI->isCopy() && !MI->isAsCheapAsAMove())
     return true;
 
   // MI is cheap, we probably don't want to break the critical edge for it.
@@ -401,35 +403,76 @@ static void collectDebugValues(MachineInstr *MI,
   }
 }
 
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
-  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
-  // be close to the source to make it easier to coalesce.
-  if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+  // FIXME - Use real post dominator.
+  if (A->succ_size() != 2)
+    return false;
+  MachineBasicBlock::succ_iterator I = A->succ_begin();
+  if (B == *I)
+    ++I;
+  MachineBasicBlock *OtherSuccBlock = *I;
+  if (OtherSuccBlock->succ_size() != 1 ||
+      *(OtherSuccBlock->succ_begin()) != B)
     return false;
 
-  // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, AA, SawStore))
+  return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, 
+                                          MachineBasicBlock *MBB,
+                                          MachineBasicBlock *SuccToSinkTo) {
+  assert (MI && "Invalid MachineInstr!");
+  assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+  if (MBB == SuccToSinkTo)
     return false;
 
-  // FIXME: This should include support for sinking instructions within the
-  // block they are currently in to shorten the live ranges.  We often get
-  // instructions sunk into the top of a large block, but it would be better to
-  // also sink them down before their first use in the block.  This xform has to
-  // be careful not to *increase* register pressure though, e.g. sinking
-  // "x = y + z" down if it kills y and z would increase the live ranges of y
-  // and z and only shrink the live range of x.
+  // It is profitable if SuccToSinkTo does not post dominate current block.
+  if (!isPostDominatedBy(MBB, SuccToSinkTo))
+      return true;
+
+  // Check if only use in post dominated block is PHI instruction.
+  bool NonPHIUse = false;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+      NonPHIUse = true;
+  }
+  if (!NonPHIUse)
+    return true;
+
+  // If SuccToSinkTo post dominates then also it may be profitable if MI
+  // can further profitably sinked into another block in next round.
+  bool BreakPHIEdge = false;
+  // FIXME - If finding successor is compile time expensive then catch results.
+  if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+  // If SuccToSinkTo is final destination and it is a post dominator of current
+  // block then it is not profitable to sink MI into SuccToSinkTo block.
+  return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+                                   MachineBasicBlock *MBB,
+                                   bool &BreakPHIEdge) {
+
+  assert (MI && "Invalid MachineInstr!");
+  assert (MBB && "Invalid MachineBasicBlock!");
 
   // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
-  MachineBasicBlock *ParentBlock = MI->getParent();
 
   // SuccToSinkTo - This is the successor to sink this instruction to, once we
   // decide.
   MachineBasicBlock *SuccToSinkTo = 0;
-
-  bool BreakPHIEdge = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;  // Ignore non-register operands.
@@ -443,23 +486,23 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
         if (!MRI->def_empty(Reg))
-          return false;
+          return NULL;
 
         if (AllocatableSet.test(Reg))
-          return false;
+          return NULL;
 
         // Check for a def among the register's aliases too.
         for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
           if (!MRI->def_empty(AliasReg))
-            return false;
+            return NULL;
 
           if (AllocatableSet.test(AliasReg))
-            return false;
+            return NULL;
         }
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
-        return false;
+        return NULL;
       }
     } else {
       // Virtual register uses are always safe to sink.
@@ -467,7 +510,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
       // If it's not safe to move defs of the register class, then abort.
       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
-        return false;
+        return NULL;
 
       // FIXME: This picks a successor to sink into based on having one
       // successor that dominates all the uses.  However, there are cases where
@@ -488,48 +531,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If a previous operand picked a block to sink to, then this operand
         // must be sinkable to the same block.
         bool LocalUse = false;
-        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
                                      BreakPHIEdge, LocalUse))
-          return false;
+          return NULL;
 
         continue;
       }
 
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to.
-      for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
-           E = ParentBlock->succ_end(); SI != E; ++SI) {
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        MachineBasicBlock *SuccBlock = *SI;
         bool LocalUse = false;
-        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+        if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
                                     BreakPHIEdge, LocalUse)) {
-          SuccToSinkTo = *SI;
+          SuccToSinkTo = SuccBlock;
           break;
         }
         if (LocalUse)
           // Def is used locally, it's never safe to move this def.
-          return false;
+          return NULL;
       }
 
       // If we couldn't find a block to sink to, ignore this instruction.
       if (SuccToSinkTo == 0)
-        return false;
+        return NULL;
+      else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+        return NULL;
     }
   }
 
-  // If there are no outputs, it must have side-effects.
-  if (SuccToSinkTo == 0)
-    return false;
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (MBB == SuccToSinkTo)
+    return NULL;
 
   // It's not safe to sink instructions to EH landing pad. Control flow into
   // landing pad is implicitly defined.
-  if (SuccToSinkTo->isLandingPad())
+  if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+    return NULL;
+
+  return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
     return false;
 
-  // It is not possible to sink an instruction into its own block.  This can
-  // happen with loops.
-  if (MI->getParent() == SuccToSinkTo)
+  // Check if it's safe to move the instruction.
+  if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
 
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+
+  bool BreakPHIEdge = false;
+  MachineBasicBlock *ParentBlock = MI->getParent();
+  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+  // If there are no outputs, it must have side-effects.
+  if (SuccToSinkTo == 0)
+    return false;
+
+
   // If the instruction to move defines a dead physical register which is live
   // when leaving the basic block, don't move it because it could turn into a
   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index f231e3c..0a2c2f8 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -279,13 +279,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
        MFI!=MFE; ++MFI) {
     visitMachineBasicBlockBefore(MFI);
-    for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
-           MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+    for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+           MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
         report("Bad instruction parent pointer", MFI);
         *OS << "Instruction: " << *MBBI;
         continue;
       }
+      // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+      // the BUNDLE's specifically.
+      if (MBBI->isBundle())
+        continue;
       visitMachineInstrBefore(MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
@@ -435,7 +439,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
-      if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+      if (!MBB->empty() && MBB->back().isBarrier() &&
           !TII->isPredicated(&MBB->back())) {
         report("MBB exits via unconditional fall-through but ends with a "
                "barrier instruction!", MBB);
@@ -456,10 +460,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via unconditional branch but doesn't contain "
                "any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via unconditional branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via unconditional branch but the branch isn't a "
                "terminator instruction!", MBB);
       }
@@ -479,10 +483,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/fall-through but doesn't "
                "contain any instructions!", MBB);
-      } else if (MBB->back().getDesc().isBarrier()) {
+      } else if (MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/fall-through but ends with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/fall-through but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -499,10 +503,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/branch but doesn't "
                "contain any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/branch but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -555,9 +559,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
-    if ((*I)->isLoad() && !MCID.mayLoad())
+    if ((*I)->isLoad() && !MI->mayLoad())
       report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !MCID.mayStore())
+    if ((*I)->isStore() && !MI->mayStore())
       report("Missing mayStore flag", MI);
   }
 
@@ -575,7 +579,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   }
 
   // Ensure non-terminators don't follow terminators.
-  if (MCID.isTerminator()) {
+  if (MI->isTerminator()) {
     if (!FirstTerminator)
       FirstTerminator = MI;
   } else if (FirstTerminator) {
@@ -606,7 +610,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
     if (MO->isReg() &&
-        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+        !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
       if (MO->isDef() && !MCOI.isOptionalDef())
           report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
@@ -614,7 +618,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     }
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
-    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+    if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
       report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
@@ -800,11 +804,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
       LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (MCID.mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+      if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
-      if (MCID.mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+      if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa5..0e52496 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -410,7 +410,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
     return false;   // Quick exit for basic blocks without PHIs.
 
   bool Changed = false;
-  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+  for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2..2a5652a 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -292,7 +292,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
   assert(Def && Src && "Malformed bitcast instruction!");
 
   MachineInstr *DefMI = MRI->getVRegDef(Src);
-  if (!DefMI || !DefMI->getDesc().isBitcast())
+  if (!DefMI || !DefMI->isBitcast())
     return false;
 
   unsigned SrcSrc = 0;
@@ -353,7 +353,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isMoveImmediate())
+  if (!MI->isMoveImmediate())
     return false;
   if (MCID.getNumDefs() != 1)
     return false;
@@ -428,9 +428,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      const MCInstrDesc &MCID = MI->getDesc();
-
-      if (MCID.isBitcast()) {
+      if (MI->isBitcast()) {
         if (OptimizeBitcastInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
@@ -438,7 +436,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
           MII = First ? I->begin() : llvm::next(PMII);
           continue;
         }        
-      } else if (MCID.isCompare()) {
+      } else if (MI->isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 7205ed6..fa832c8 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -212,7 +212,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
-  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+    TargetSubtargetInfo::ANTIDEP_NONE;
   SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
@@ -271,6 +272,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       }
       I = MI;
       --Count;
+      if (MI->isBundle())
+        Count -= MI->getBundleSize();
     }
     assert(Count == 0 && "Instruction count mismatch!");
     assert((MBB->begin() == Current || CurrentCount != 0) &&
@@ -364,7 +367,7 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
     KillIndices[i] = ~0u;
 
   // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+  if (!BB->empty() && BB->back().isReturn()) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
            E = MRI.liveout_end(); I != E; ++I) {
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c9325..b4fd1cb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -332,7 +332,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = I;
-      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+      while (I2 != MBB->begin() && (--I2)->isTerminator())
         I = I2;
 
       bool AtStart = I == MBB->begin();
@@ -426,11 +426,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Skip over all terminator instructions, which are part of the
       // return sequence.
-      if (! I->getDesc().isTerminator()) {
+      if (! I->isTerminator()) {
         ++I;
       } else {
         MachineBasicBlock::iterator I2 = I;
-        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+        while (I2 != MBB->begin() && (--I2)->isTerminator())
           I = I2;
       }
     }
@@ -698,7 +698,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // Add epilogue to restore the callee-save registers in each exiting block
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn())
+    if (!I->empty() && I->back().isReturn())
       TFI.emitEpilogue(Fn, *I);
   }
 
@@ -706,7 +706,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
-  if (EnableSegmentedStacks)
+  if (Fn.getTarget().Options.EnableSegmentedStacks)
     TFI.adjustForSegmentedStacks(Fn);
 }
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 4664a3c..c2656c5 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -748,8 +748,8 @@ void RAFast::AllocateBasicBlock() {
   // and return are tail calls; do not do this for them.  The tail callee need
   // not take the same registers as input that it produces as output, and there
   // are dependencies for its input registers elsewhere.
-  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
-      !MBB->back().getDesc().isCall()) {
+  if (!MBB->empty() && MBB->back().isReturn() &&
+      !MBB->back().isCall()) {
     MachineInstr *Ret = &MBB->back();
 
     for (MachineRegisterInfo::liveout_iterator
@@ -968,7 +968,7 @@ void RAFast::AllocateBasicBlock() {
     }
 
     unsigned DefOpEnd = MI->getNumOperands();
-    if (MCID.isCall()) {
+    if (MI->isCall()) {
       // Spill all virtregs before a call. This serves two purposes: 1. If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots, and 2. we don't have to wade through
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 845ee12..a053ccc 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,13 +34,14 @@
 #include "LiveRangeEdit.h"
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
-#include "Splitter.h"
 #include "VirtRegMap.h"
 #include "RegisterCoalescer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -68,11 +69,6 @@ pbqpCoalescing("pbqp-coalescing",
                 cl::desc("Attempt coalescing during PBQP register allocation."),
                 cl::init(false), cl::Hidden);
 
-static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
-                 cl::desc("Pre-split before PBQP register allocation."),
-                 cl::init(false), cl::Hidden);
-
 namespace {
 
 ///
@@ -93,7 +89,6 @@ public:
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
     initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
     initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
   }
@@ -444,6 +439,9 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
 
 
 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesCFG();
+  au.addRequired<AliasAnalysis>();
+  au.addPreserved<AliasAnalysis>();
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
@@ -454,10 +452,10 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
+  au.addRequired<MachineDominatorTree>();
+  au.addPreserved<MachineDominatorTree>();
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
-  if (pbqpPreSplitting)
-    au.addRequired<LoopSplitter>();
   au.addRequired<VirtRegMap>();
   au.addRequired<RenderMachineFunction>();
   MachineFunctionPass::getAnalysisUsage(au);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 22d6a3b..cd181cd 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -651,8 +651,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
     return false;
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isCommutable())
+  if (!DefMI->isCommutable())
     return false;
   // If DefMI is a two-address instruction then commuting it will change the
   // destination register.
@@ -718,7 +717,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
     return false;
   if (NewMI != DefMI) {
     LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
-    MBB->insert(DefMI, NewMI);
+    MachineBasicBlock::iterator Pos = DefMI;
+    MBB->insert(Pos, NewMI);
     MBB->erase(DefMI);
   }
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -809,14 +809,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   if (!DefMI)
     return false;
   assert(DefMI && "Defining instruction disappeared");
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isAsCheapAsAMove())
+  if (!DefMI->isAsCheapAsAMove())
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
   if (!DefMI->isSafeToMove(TII, AA, SawStore))
     return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
     return false;
   if (!DefMI->isImplicitDef()) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0..4418f40 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -157,7 +157,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
   MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
   ExitSU.setInstr(ExitMI);
   bool AllDepKnown = ExitMI &&
-    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+    (ExitMI->isCall() || ExitMI->isBarrier());
   if (ExitMI && AllDepKnown) {
     // If it's a call or a barrier, add dependencies on the defs and uses of
     // instruction.
@@ -238,13 +238,12 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       continue;
     }
 
-    const MCInstrDesc &MCID = MI->getDesc();
-    assert(!MCID.isTerminator() && !MI->isLabel() &&
+    assert(!MI->isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
-    SU->isCall = MCID.isCall();
-    SU->isCommutable = MCID.isCommutable();
+    SU->isCall = MI->isCall();
+    SU->isCommutable = MI->isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -278,8 +277,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           continue;
         if (DefSU != SU &&
             (Kind != SDep::Output || !MO.isDead() ||
-             !DefSU->getInstr()->registerDefIsDead(Reg)))
-          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+             !DefSU->getInstr()->registerDefIsDead(Reg))) {
+          if (Kind == SDep::Anti)
+            DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/Reg));
+          else {
+            unsigned AOLat = TII->getOutputLatency(InstrItins, MI, j,
+                                                   DefSU->getInstr());
+            DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/Reg));
+          }
+        }
       }
       for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         std::vector<SUnit *> &MemDefList = Defs[*Alias];
@@ -315,7 +321,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
             int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
             assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
             if (RegUseIndex >= 0 &&
-                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+                (UseMI->mayLoad() || UseMI->mayStore()) &&
                 (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
                 UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
               LDataLatency += SpecialAddressLatency;
@@ -419,9 +425,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+    if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() &&
-         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+         (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +466,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       PendingLoads.clear();
       AliasMemDefs.clear();
       AliasMemUses.clear();
-    } else if (MCID.mayStore()) {
+    } else if (MI->mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +522,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                             /*Reg=*/0, /*isNormalMemory=*/false,
                             /*isMustAlias=*/false,
                             /*isArtificial=*/true));
-    } else if (MCID.mayLoad()) {
+    } else if (MI->mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
@@ -576,7 +582,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
 
     // Simplistic target-independent heuristic: assume that loads take
     // extra time.
-    if (SU->getInstr()->getDesc().mayLoad())
+    if (SU->getInstr()->mayLoad())
       SU->Latency += 2;
   } else {
     SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
@@ -658,39 +664,33 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 
 // EmitSchedule - Emit the machine code in scheduled order.
 MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
-  // For MachineInstr-based scheduling, we're rescheduling the instructions in
-  // the block, so start by removing them from the block.
-  while (Begin != InsertPos) {
-    MachineBasicBlock::iterator I = Begin;
-    ++Begin;
-    BB->remove(I);
-  }
+  Begin = InsertPos;
 
   // If first instruction was a DBG_VALUE then put it back.
   if (FirstDbgValue)
-    BB->insert(InsertPos, FirstDbgValue);
+    BB->splice(InsertPos, BB, FirstDbgValue);
 
   // Then re-insert them according to the given schedule.
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     if (SUnit *SU = Sequence[i])
-      BB->insert(InsertPos, SU->getInstr());
+      BB->splice(InsertPos, BB, SU->getInstr());
     else
       // Null SUnit* is a noop.
       EmitNoop();
-  }
 
-  // Update the Begin iterator, as the first instruction in the block
-  // may have been scheduled later.
-  if (!Sequence.empty())
-    Begin = Sequence[0]->getInstr();
+    // Update the Begin iterator, as the first instruction in the block
+    // may have been scheduled later.
+    if (i == 0)
+      Begin = prior(InsertPos);
+  }
 
   // Reinsert any remaining debug_values.
   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
     MachineInstr *DbgValue = P.first;
-    MachineInstr *OrigPrivMI = P.second;
-    BB->insertAfter(OrigPrivMI, DbgValue);
+    MachineBasicBlock::iterator OrigPrivMI = P.second;
+    BB->splice(++OrigPrivMI, BB, DbgValue);
   }
   DbgValues.clear();
   FirstDbgValue = NULL;
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index ff4184f..6023326 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -20,13 +20,3 @@ add_llvm_library(LLVMSelectionDAG
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMSelectionDAG
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d8208a4..80cf0a8 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -180,7 +180,9 @@ namespace {
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTPOP(SDNode *N);
     SDValue visitSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
@@ -361,6 +363,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 /// specified expression for the same cost as the expression itself, or 2 if we
 /// can compute the negated form more cheaply than the expression itself.
 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               const TargetOptions *Options,
                                unsigned Depth = 0) {
   // No compile time optimizations on this type.
   if (Op.getValueType() == MVT::ppcf128)
@@ -383,34 +386,39 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     return LegalOperations ? 0 : 1;
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
 
     // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+                                    Depth + 1))
       return V;
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+                              Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return 1;
 
   case ISD::FMUL:
   case ISD::FDIV:
-    if (HonorSignDependentRoundingFPMath()) return 0;
+    if (Options->HonorSignDependentRoundingFPMath()) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+                                    Depth + 1))
       return V;
 
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options,
+                              Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FSIN:
-    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options,
+                              Depth + 1);
   }
 }
 
@@ -434,10 +442,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -449,7 +458,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                        Op.getOperand(0));
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fsub 0, B)) -> B
     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -462,10 +471,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   case ISD::FMUL:
   case ISD::FDIV:
-    assert(!HonorSignDependentRoundingFPMath());
+    assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -1070,7 +1080,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SRA:                return visitSRA(N);
   case ISD::SRL:                return visitSRL(N);
   case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   case ISD::CTPOP:              return visitCTPOP(N);
   case ISD::SELECT:             return visitSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
@@ -1769,7 +1781,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
                          N0, N1);
   }
   // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+  if (N1C && !N1C->isNullValue() &&
       (N1C->getAPIntValue().isPowerOf2() ||
        (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
@@ -3709,6 +3721,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3719,6 +3741,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -5254,20 +5286,22 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
   // fold (fadd A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
-  if (isNegatibleForFree(N1, LegalOperations) == 2)
+  if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
-  if (isNegatibleForFree(N0, LegalOperations) == 2)
+  if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
   // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
-      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
@@ -5292,17 +5326,19 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
   // fold (fsub A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fsub 0, B) -> -B
-  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
-    if (isNegatibleForFree(N1, LegalOperations))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations))
+  if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
@@ -5329,10 +5365,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
   // fold (fmul A, 0) -> 0
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N1;
   // fold (fmul A, 0) -> 0, vector edition.
-  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      ISD::isBuildVectorAllZeros(N1.getNode()))
     return N1;
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5343,8 +5381,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5355,7 +5395,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N0.getOpcode() == ISD::FMUL &&
       N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5383,8 +5424,10 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations,
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5637,7 +5680,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (isNegatibleForFree(N0, LegalOperations))
+  if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options))
     return GetNegatedExpression(N0, DAG, LegalOperations);
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
@@ -7162,19 +7205,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
       return SDValue();
 
-    // Combine:
-    //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
-    // Into:
-    //    indicies are equal => V1
-    //    otherwise => (extract_subvec V1, ExtIdx)
-    //
-    SDValue InsIdx = N->getOperand(1);
-    SDValue ExtIdx = V->getOperand(2);
-
-    if (InsIdx == ExtIdx)
-      return V->getOperand(1);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
-                       V->getOperand(0), N->getOperand(1));
+    // Only handle cases where both indexes are constants with the same type.
+    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+
+    if (InsIdx && ExtIdx &&
+        InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+        ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+      // Combine:
+      //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+      // Into:
+      //    indices are equal => V1
+      //    otherwise => (extract_subvec V1, ExtIdx)
+      if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+        return V->getOperand(1);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+                         V->getOperand(0), N->getOperand(1));
+    }
   }
 
   return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index cff37c2..b4946ec 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -62,8 +62,11 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by target-independent selector");
-STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by target-specific selector");
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+          "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+          "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
 
 /// startNewBlock - Set the current block to which generated machine
 /// instructions will be appended, and clear the local CSE map.
@@ -307,6 +310,18 @@ void FastISel::recomputeInsertPt() {
     ++FuncInfo.InsertPt;
 }
 
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+                              MachineBasicBlock::iterator E) {
+  assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+  while (I != E) {
+    MachineInstr *Dead = &*I;
+    ++I;
+    Dead->eraseFromParent();
+    ++NumFastIselDead;
+  }
+  recomputeInsertPt();
+}
+
 FastISel::SavePoint FastISel::enterLocalValueArea() {
   MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
   DebugLoc OldDL = DL;
@@ -792,19 +807,33 @@ FastISel::SelectInstruction(const Instruction *I) {
 
   DL = I->getDebugLoc();
 
+  MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
   // First, try doing target-independent selection.
   if (SelectOperator(I, I->getOpcode())) {
     ++NumFastIselSuccessIndependent;
     DL = DebugLoc();
     return true;
   }
+  // Remove dead code.  However, ignore call instructions since we've flushed 
+  // the local value map and recomputed the insert point.
+  if (!isa<CallInst>(I)) {
+    recomputeInsertPt();
+    if (SavedInsertPt != FuncInfo.InsertPt)
+      removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
+  SavedInsertPt = FuncInfo.InsertPt;
   if (TargetSelectInstruction(I)) {
     ++NumFastIselSuccessTarget;
     DL = DebugLoc();
     return true;
   }
+  // Check for dead code and remove as necessary.
+  recomputeInsertPt();
+  if (SavedInsertPt != FuncInfo.InsertPt)
+    removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
 
   DL = DebugLoc();
   return false;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8..cb6fd53 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
       DstRC = TII->getRegClass(*II, IIOpNum, TRI);
-    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+    assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
     if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
index 10a849f..81d2e00 100644
--- a/lib/CodeGen/SelectionDAG/LLVMBuild.txt
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = SelectionDAG
 parent = CodeGen
 required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
-
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 156cc70..75f5761 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -785,7 +785,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -2383,6 +2382,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 
     return Op;
   }
+  case ISD::CTLZ_ZERO_UNDEF:
+    // This trivially expands to CTLZ.
+    return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
     // for now, we do this:
     // x = x | (x >> 1);
@@ -2404,6 +2406,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     Op = DAG.getNOT(dl, Op, VT);
     return DAG.getNode(ISD::CTPOP, dl, VT, Op);
   }
+  case ISD::CTTZ_ZERO_UNDEF:
+    // This trivially expands to CTTZ.
+    return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
   case ISD::CTTZ: {
     // for now, we use: { return popcount(~x & (x - 1)); }
     // unless the target has ctlz but not ctpop, in which case we use:
@@ -2518,7 +2523,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   switch (Node->getOpcode()) {
   case ISD::CTPOP:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
     Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
     Results.push_back(Tmp1);
     break;
@@ -2538,7 +2545,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::PREFETCH:
   case ISD::VAEND:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // If the target didn't expand these, there's nothing to do, so just
     // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
@@ -3421,20 +3427,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP:
     // Zero extend the argument.
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
-    // Perform the larger operation.
+    // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+    // already the correct result.
     Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     if (Node->getOpcode() == ISD::CTTZ) {
-      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      // FIXME: This should set a bit in the zero extended value instead.
       Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
-    } else if (Node->getOpcode() == ISD::CTLZ) {
+    } else if (Node->getOpcode() == ISD::CTLZ ||
+               Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
       Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
                           DAG.getConstant(NVT.getSizeInBits() -
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index fd24238..1c02c4f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -56,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
   case ISD::CONVERT_RNDSAT:
                          Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
   case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
   case ISD::EXTRACT_VECTOR_ELT:
                          Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -216,7 +218,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
-    if (NOutVT.bitsEq(NInVT))
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
     break;
@@ -311,7 +313,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
-  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(ISD::SUB, dl, NVT, Op,
                      DAG.getConstant(NVT.getSizeInBits() -
@@ -329,13 +331,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
-  // The count is the same in the promoted type except if the original
-  // value was zero.  This can be handled by setting the bit just off
-  // the top of the original type.
-  APInt TopBit(NVT.getSizeInBits(), 0);
-  TopBit.setBit(OVT.getSizeInBits());
-  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
-  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+  if (N->getOpcode() == ISD::CTTZ) {
+    // The count is the same in the promoted type except if the original
+    // value was zero.  This can be handled by setting the bit just off
+    // the top of the original type.
+    APInt TopBit(NVT.getSizeInBits(), 0);
+    TopBit.setBit(OVT.getSizeInBits());
+    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  }
+  return DAG.getNode(N->getOpcode(), dl, NVT, Op);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -1097,8 +1101,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1701,8 +1707,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1731,8 +1737,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4e02b90..4696c0d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -185,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR:
-  case ISD::CTTZ:
   case ISD::CTLZ:
+  case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTPOP:
   case ISD::SELECT:
   case ISD::VSELECT:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ad83565..7ca0d1e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -441,8 +441,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::ANY_EXTEND:
   case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
-  case ISD::CTPOP:
   case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTPOP:
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index cd0da37..80162d7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -102,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
   "sched-avg-ipc", cl::Hidden, cl::init(1),
   cl::desc("Average inst/cycle whan no target itinerary exists."));
 
-#ifndef NDEBUG
-namespace {
-  // For sched=list-ilp, Count the number of times each factor comes into play.
-  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
-         FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -157,6 +146,10 @@ private:
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
 
+  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+  // DAG crawling.
+  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
 public:
   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
                     SchedulingPriorityQueue *availqueue,
@@ -308,11 +301,6 @@ void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
         << " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    FactorCount[i] = 0;
-  }
-#endif //!NDEBUG
 
   CurCycle = 0;
   IssueCount = 0;
@@ -322,6 +310,7 @@ void ScheduleDAGRRList::Schedule() {
   // to track the virtual resource of a calling sequence.
   LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
   LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+  CallSeqEndForStart.clear();
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -337,11 +326,6 @@ void ScheduleDAGRRList::Schedule() {
   // Execute the actual scheduling loop.
   ListScheduleBottomUp();
 
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
-  }
-#endif // !NDEBUG
   AvailableQueue->releaseState();
 }
 
@@ -545,6 +529,8 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
         SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
 
         SUnit *Def = &SUnits[N->getNodeId()];
+        CallSeqEndForStart[Def] = SU;
+
         ++NumLiveRegs;
         LiveRegDefs[CallResource] = Def;
         LiveRegGens[CallResource] = SU;
@@ -811,7 +797,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
         SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
       ++NumLiveRegs;
       LiveRegDefs[CallResource] = SU;
-      LiveRegGens[CallResource] = NULL;
+      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
     }
   }
 
@@ -832,12 +818,11 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
+      if (!LiveRegDefs[I->getReg()])
+        ++NumLiveRegs;
       // This becomes the nearest def. Note that an earlier def may still be
       // pending if this is a two-address node.
       LiveRegDefs[I->getReg()] = SU;
-      if (!LiveRegDefs[I->getReg()]) {
-        ++NumLiveRegs;
-      }
       if (LiveRegGens[I->getReg()] == NULL ||
           I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
         LiveRegGens[I->getReg()] = I->getSUnit();
@@ -2296,28 +2281,20 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall) {
-      DEBUG(++FactorCount[FactStall]);
+    if (!RStall)
       return 1;
-    }
-    if (LHeight != RHeight) {
-      DEBUG(++FactorCount[FactStall]);
+    if (LHeight != RHeight)
       return LHeight > RHeight ? 1 : -1;
-    }
-  } else if (RStall) {
-    DEBUG(++FactorCount[FactStall]);
+  } else if (RStall)
     return -1;
-  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
   if (!checkPref || (left->SchedulingPref == Sched::ILP ||
                      right->SchedulingPref == Sched::ILP)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight) {
-        DEBUG(++FactorCount[FactHeight]);
+      if (LHeight != RHeight)
         return LHeight > RHeight ? 1 : -1;
-      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
@@ -2326,17 +2303,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
       int LDepth = left->getDepth() - LPenalty;
       int RDepth = right->getDepth() - RPenalty;
       if (LDepth != RDepth) {
-        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency) {
-      DEBUG(++FactorCount[FactOther]);
+    if (left->Latency != right->Latency)
       return left->Latency > right->Latency ? 1 : -1;
-    }
   }
   return 0;
 }
@@ -2350,7 +2324,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     bool LHasPhysReg = left->hasPhysRegDefs;
     bool RHasPhysReg = right->hasPhysRegDefs;
     if (LHasPhysReg != RHasPhysReg) {
-      DEBUG(++FactorCount[FactRegUses]);
       #ifndef NDEBUG
       const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
       #endif
@@ -2376,10 +2349,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
   }
 
-  if (LPriority != RPriority) {
-    DEBUG(++FactorCount[FactStatic]);
+  if (LPriority != RPriority)
     return LPriority > RPriority;
-  }
 
   // One or both of the nodes are calls and their sethi-ullman numbers are the
   // same, then keep source order.
@@ -2412,18 +2383,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LDist != RDist)
     return LDist < RDist;
-  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LScratch != RScratch)
     return LScratch > RScratch;
-  }
 
   // Comparing latency against a call makes little sense unless the node
   // is register pressure-neutral.
@@ -2438,20 +2405,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight()) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (left->getHeight() != right->getHeight())
       return left->getHeight() > right->getHeight();
-    }
 
-    if (left->getDepth() != right->getDepth()) {
-      DEBUG(++FactorCount[FactDepth]);
+    if (left->getDepth() != right->getDepth())
       return left->getDepth() < right->getDepth();
-    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
-  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
@@ -2511,13 +2473,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
@@ -2581,7 +2541,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
   }
   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
           << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
     return LPDiff > RPDiff;
@@ -2590,7 +2549,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
     bool LReduce = canEnableCoalescing(left);
     bool RReduce = canEnableCoalescing(right);
-    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
     if (LReduce && !RReduce) return false;
     if (RReduce && !LReduce) return true;
   }
@@ -2598,17 +2556,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
-    DEBUG(++FactorCount[FactRegUses]);
     return LLiveUses < RLiveUses;
   }
 
   if (!DisableSchedStalls) {
     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
-    if (LStall != RStall) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (LStall != RStall)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   if (!DisableSchedCriticalPath) {
@@ -2617,17 +2572,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
       DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
             << left->getDepth() << " != SU(" << right->NodeNum << "): "
             << right->getDepth() << "\n");
-      DEBUG(++FactorCount[FactDepth]);
       return left->getDepth() < right->getDepth();
     }
   }
 
   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
     int spread = (int)left->getHeight() - (int)right->getHeight();
-    if (std::abs(spread) > MaxReorderWindow) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (std::abs(spread) > MaxReorderWindow)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   return BURRSort(left, right, SPQ);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 497c286..dd626e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -840,9 +840,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
@@ -1856,7 +1856,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP: {
     unsigned LowBits = Log2_32(BitWidth)+1;
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -2334,7 +2336,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
 
 bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   // If we're told that NaNs won't happen, assume they won't.
-  if (NoNaNsFPMath)
+  if (getTarget().Options.NoNaNsFPMath)
     return true;
 
   // If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2429,8 +2431,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     case ISD::CTPOP:
       return getConstant(Val.countPopulation(), VT);
     case ISD::CTLZ:
+    case ISD::CTLZ_ZERO_UNDEF:
       return getConstant(Val.countLeadingZeros(), VT);
     case ISD::CTTZ:
+    case ISD::CTTZ_ZERO_UNDEF:
       return getConstant(Val.countTrailingZeros(), VT);
     }
   }
@@ -2607,7 +2611,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     break;
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
       return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
                      Operand.getNode()->getOperand(0));
     if (OpOpcode == ISD::FNEG)  // --X -> X
@@ -2742,7 +2746,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
-    if (UnsafeFPMath) {
+    if (getTarget().Options.UnsafeFPMath) {
       if (Opcode == ISD::FADD) {
         // 0+x --> x
         if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3065,7 +3069,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     case ISD::FMUL:
     case ISD::FDIV:
     case ISD::FREM:
-      if (UnsafeFPMath)
+      if (getTarget().Options.UnsafeFPMath)
         return N2;
       break;
     case ISD::MUL:
@@ -4914,6 +4918,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
   return N;
 }
 
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+  DebugLoc NLoc = N->getDebugLoc();
+  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+    N->setDebugLoc(DebugLoc());
+  }
+  return N;
+}
+
 /// MorphNodeTo - This *mutates* the specified node to have the specified
 /// return type, opcode, and operands.
 ///
@@ -4935,7 +4953,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
     if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return ON;
+      return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -5139,8 +5157,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
     IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return cast<MachineSDNode>(E);
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+    }
   }
 
   // Allocate a new MachineSDNode.
@@ -5943,7 +5962,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_RETURN: return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
-  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
   case ISD::ConstantPool:  return "ConstantPool";
   case ISD::ExternalSymbol: return "ExternalSymbol";
   case ISD::BlockAddress:  return "BlockAddress";
@@ -6112,10 +6130,12 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::TRAP:               return "trap";
 
   // Bit manipulation
-  case ISD::BSWAP:   return "bswap";
-  case ISD::CTPOP:   return "ctpop";
-  case ISD::CTTZ:    return "cttz";
-  case ISD::CTLZ:    return "ctlz";
+  case ISD::BSWAP:           return "bswap";
+  case ISD::CTPOP:           return "ctpop";
+  case ISD::CTTZ:            return "cttz";
+  case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef";
+  case ISD::CTLZ:            return "ctlz";
+  case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef";
 
   // Trampolines
   case ISD::INIT_TRAMPOLINE: return "init_trampoline";
@@ -6146,6 +6166,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     case ISD::SETLT:   return "setlt";
     case ISD::SETLE:   return "setle";
     case ISD::SETNE:   return "setne";
+
+    case ISD::SETTRUE:   return "settrue";
+    case ISD::SETTRUE2:  return "settrue2";
+    case ISD::SETFALSE:  return "setfalse";
+    case ISD::SETFALSE2: return "setfalse2";
     }
   }
 }
@@ -6554,20 +6579,15 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    // If GV has specified alignment, then use it. Otherwise, use the preferred
-    // alignment.
-    unsigned Align = GV->getAlignment();
-    if (!Align) {
-      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-        if (GVar->hasInitializer()) {
-          const TargetData *TD = TLI.getTargetData();
-          Align = TD->getPreferredAlignment(GVar);
-        }
-      }
-      if (!Align)
-        Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
-    }
-    return MinAlign(Align, GVOffset);
+    unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+    APInt AllOnes = APInt::getAllOnesValue(PtrWidth);
+    APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+    llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), AllOnes,
+                            KnownZero, KnownOne, TLI.getTargetData());
+    unsigned AlignBits = KnownZero.countTrailingOnes();
+    unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+    if (Align)
+      return MinAlign(Align, GVOffset);
   }
 
   // If this is a direct reference to a stack slot, use information about the
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8d02350..68c9514 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -47,6 +47,7 @@
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -812,9 +813,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
   }
 }
 
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+                               const TargetLibraryInfo *li) {
   AA = &aa;
   GFI = gfi;
+  LibInfo = li;
   TD = DAG.getTarget().getTargetData();
   LPadToCallSiteMap.clear();
 }
@@ -1335,6 +1338,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
         Condition = getICmpCondCode(IC->getPredicate());
       } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
+        if (TM.Options.NoNaNsFPMath)
+          Condition = getFCmpCodeWithoutNaN(Condition);
       } else {
         Condition = ISD::SETEQ; // silence warning.
         llvm_unreachable("Unknown compare instruction");
@@ -2002,7 +2007,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
 }
 
 static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
+  return !TLI.getTargetMachine().Options.DisableJumpTables &&
           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
 }
@@ -2625,6 +2630,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
+  if (TM.Options.NoNaNsFPMath)
+    Condition = getFCmpCodeWithoutNaN(Condition);
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
@@ -3095,7 +3102,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
           unsigned Amt = ElementSize.logBase2();
           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                              N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
+                             DAG.getConstant(Amt, IdxN.getValueType()));
         } else {
           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -4775,11 +4782,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
-  case Intrinsic::eh_sjlj_dispatch_setup: {
-    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot(), getValue(I.getArgOperand(0))));
-    return 0;
-  }
 
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
@@ -4946,14 +4948,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctpop: {
@@ -5064,7 +5070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::trap: {
-    StringRef TrapFuncName = getTrapFunctionName();
+    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
       DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
       return 0;
@@ -5226,7 +5232,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // If there's a possibility that fast-isel has already selected some amount
   // of the current basic block, don't emit a tail call.
-  if (isTailCall && EnableFastISel)
+  if (isTailCall && TM.Options.EnableFastISel)
     isTailCall = false;
 
   std::pair<SDValue,SDValue> Result =
@@ -5510,7 +5516,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     // can't be a library call.
     if (!F->hasLocalLinkage() && F->hasName()) {
       StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5521,7 +5529,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType()) {
@@ -5530,7 +5540,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5540,7 +5552,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5550,7 +5564,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5560,6 +5576,83 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
       } else if (Name == "memcmp") {
         if (visitMemCmpCall(I))
           return;
@@ -6516,10 +6609,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
-  if (EnableFastISel)
+  if (FastISel)
     return A->use_empty();
 
   const BasicBlock *Entry = A->getParent()->begin();
@@ -6709,7 +6802,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
                                      SDB->getCurDebugLoc());
 
     SDB->setValue(I, Res);
-    if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
       if (LoadSDNode *LNode = 
           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
         if (FrameIndexSDNode *FI =
@@ -6719,7 +6812,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     // If this argument is live outside of the entry block, insert a copy from
     // wherever we got it to the vreg that other BB's will reference it as.
-    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
       // If we can, though, try to skip creating an unnecessary vreg.
       // FIXME: This isn't very clean... it would be nice to make this more
       // general.  It's also subtly incompatible with the hacks FastISel
@@ -6730,7 +6823,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
         continue;
       }
     }
-    if (!isOnlyUsedInEntryBlock(I)) {
+    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
       FuncInfo->InitializeRegForValue(I);
       SDB->CopyToExportRegsIfNeeded(I);
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3..5147b6c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,6 +67,7 @@ class SIToFPInst;
 class StoreInst;
 class SwitchInst;
 class TargetData;
+class TargetLibraryInfo;
 class TargetLowering;
 class TruncInst;
 class UIToFPInst;
@@ -294,6 +295,7 @@ public:
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
+  const TargetLibraryInfo *LibInfo;
 
   /// SwitchCases - Vector of CaseBlock structures used to communicate
   /// SwitchInst code generation information.
@@ -338,7 +340,8 @@ public:
       HasTailCall(false), Context(dag.getContext()) {
   }
 
-  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+            const TargetLibraryInfo *li);
 
   /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 8cecc17..3c95059 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,81 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+          cl::desc("Enable extra verbose messages in the \"fast\" "
+                   "instruction selector"));
+  // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+  // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+  // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+  // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+  // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+  // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -177,7 +253,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                    SDNode *Node) const {
-  assert(!MI->getDesc().hasPostISelHook() &&
+  assert(!MI->hasPostISelHook() &&
          "If a target marks an instruction with 'hasPostISelHook', "
          "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
 }
@@ -190,7 +266,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(tm)),
+  CurDAG(new SelectionDAG(tm, OL)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
@@ -198,6 +274,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+    initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -211,6 +288,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  AU.addRequired<TargetLibraryInfo>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -256,9 +334,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
-  assert((!EnableFastISelVerbose || EnableFastISel) &&
+  assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
-  assert((!EnableFastISelAbort || EnableFastISel) &&
+  assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
   const Function &Fn = *mf.getFunction();
@@ -268,6 +346,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
+  LibInfo = &getAnalysis<TargetLibraryInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -282,7 +361,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   else
     FuncInfo->BPI = 0;
 
-  SDB->init(GFI, *AA);
+  SDB->init(GFI, *AA, LibInfo);
 
   SelectAllBasicBlocks(Fn);
 
@@ -346,7 +425,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
                   TII.get(TargetOpcode::DBG_VALUE))
           .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
           .addImm(Offset).addMetadata(Variable);
-        EntryMBB->insertAfter(CopyUseMI, NewMI);
+        MachineBasicBlock::iterator Pos = CopyUseMI;
+        EntryMBB->insertAfter(Pos, NewMI);
       }
     }
   }
@@ -820,10 +900,88 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
+#ifndef NDEBUG
+static void collectFailStats(const Instruction *I) {
+  switch (I->getOpcode()) {
+  default: assert (0 && "<Invalid operator> ");
+
+  // Terminators
+  case Instruction::Ret:         NumFastIselFailRet++; return;
+  case Instruction::Br:          NumFastIselFailBr++; return;
+  case Instruction::Switch:      NumFastIselFailSwitch++; return;
+  case Instruction::IndirectBr:  NumFastIselFailIndirectBr++; return;
+  case Instruction::Invoke:      NumFastIselFailInvoke++; return;
+  case Instruction::Resume:      NumFastIselFailResume++; return;
+  case Instruction::Unwind:      NumFastIselFailUnwind++; return;
+  case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+  // Standard binary operators...
+  case Instruction::Add:  NumFastIselFailAdd++; return;
+  case Instruction::FAdd: NumFastIselFailFAdd++; return;
+  case Instruction::Sub:  NumFastIselFailSub++; return;
+  case Instruction::FSub: NumFastIselFailFSub++; return;
+  case Instruction::Mul:  NumFastIselFailMul++; return;
+  case Instruction::FMul: NumFastIselFailFMul++; return;
+  case Instruction::UDiv: NumFastIselFailUDiv++; return;
+  case Instruction::SDiv: NumFastIselFailSDiv++; return;
+  case Instruction::FDiv: NumFastIselFailFDiv++; return;
+  case Instruction::URem: NumFastIselFailURem++; return;
+  case Instruction::SRem: NumFastIselFailSRem++; return;
+  case Instruction::FRem: NumFastIselFailFRem++; return;
+
+  // Logical operators...
+  case Instruction::And: NumFastIselFailAnd++; return;
+  case Instruction::Or:  NumFastIselFailOr++; return;
+  case Instruction::Xor: NumFastIselFailXor++; return;
+
+  // Memory instructions...
+  case Instruction::Alloca:        NumFastIselFailAlloca++; return;
+  case Instruction::Load:          NumFastIselFailLoad++; return;
+  case Instruction::Store:         NumFastIselFailStore++; return;
+  case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+  case Instruction::AtomicRMW:     NumFastIselFailAtomicRMW++; return;
+  case Instruction::Fence:         NumFastIselFailFence++; return;
+  case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+  // Convert instructions...
+  case Instruction::Trunc:    NumFastIselFailTrunc++; return;
+  case Instruction::ZExt:     NumFastIselFailZExt++; return;
+  case Instruction::SExt:     NumFastIselFailSExt++; return;
+  case Instruction::FPTrunc:  NumFastIselFailFPTrunc++; return;
+  case Instruction::FPExt:    NumFastIselFailFPExt++; return;
+  case Instruction::FPToUI:   NumFastIselFailFPToUI++; return;
+  case Instruction::FPToSI:   NumFastIselFailFPToSI++; return;
+  case Instruction::UIToFP:   NumFastIselFailUIToFP++; return;
+  case Instruction::SIToFP:   NumFastIselFailSIToFP++; return;
+  case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; 
+  case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+  case Instruction::BitCast:  NumFastIselFailBitCast++; return; 
+
+  // Other instructions...
+  case Instruction::ICmp:           NumFastIselFailICmp++; return;
+  case Instruction::FCmp:           NumFastIselFailFCmp++; return;
+  case Instruction::PHI:            NumFastIselFailPHI++; return;
+  case Instruction::Select:         NumFastIselFailSelect++; return;
+  case Instruction::Call:           NumFastIselFailCall++; return;
+  case Instruction::Shl:            NumFastIselFailShl++; return;
+  case Instruction::LShr:           NumFastIselFailLShr++; return;
+  case Instruction::AShr:           NumFastIselFailAShr++; return;
+  case Instruction::VAArg:          NumFastIselFailVAArg++; return;
+  case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+  case Instruction::InsertElement:  NumFastIselFailInsertElement++; return;
+  case Instruction::ShuffleVector:  NumFastIselFailShuffleVector++; return;
+  case Instruction::ExtractValue:   NumFastIselFailExtractValue++; return;
+  case Instruction::InsertValue:    NumFastIselFailInsertValue++; return;
+  case Instruction::LandingPad:     NumFastIselFailLandingPad++; return;
+  }
+  return;
+}
+#endif
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
-  if (EnableFastISel)
+  if (TM.Options.EnableFastISel)
     FastIS = TLI.createFastISel(*FuncInfo);
 
   // Iterate over all basic blocks in the function.
@@ -931,6 +1089,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           continue;
         }
 
+#ifndef NDEBUG
+        if (EnableFastISelVerbose2)
+          collectFailStats(Inst);
+#endif
+
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d7bad43..9ced1ac 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -577,16 +577,26 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG , MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
-  setOperationAction(ISD::FEXP , MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG , MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
-  setOperationAction(ISD::FEXP , MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1473,9 +1483,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
         SDValue InnerOp = InOp.getNode()->getOperand(0);
         EVT InnerVT = InnerOp.getValueType();
-        if ((APInt::getHighBitsSet(BitWidth,
-                                   BitWidth - InnerVT.getSizeInBits()) &
-               DemandedMask) == 0 &&
+        unsigned InnerBits = InnerVT.getSizeInBits();
+        if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
           EVT ShTy = getShiftAmountTy(InnerVT);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1554,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
-    if (DemandedMask == 1)
+    if (NewMask == 1)
       return TLO.CombineTo(Op,
                            TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
                                            Op.getOperand(0), Op.getOperand(1)));
@@ -1783,7 +1792,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::BITCAST:
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (!Op.getValueType().isVector() &&
+    if (!TLO.LegalOperations() &&
+        !Op.getValueType().isVector() &&
         !Op.getOperand(0).getValueType().isVector() &&
         NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 13f269e..70fcf55 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -124,7 +124,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
 }
 
 bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
-  return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+  return (MBB && !MBB->empty() && MBB->back().isReturn());
 }
 
 // Initialize shrink wrapping DFA sets, called before iterations.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index c865192..8e2f74f 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -69,6 +69,8 @@ namespace {
 
   private:
     bool setupEntryBlockAndCallSites(Function &F);
+    void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                              Value *SelVal);
     Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
     void lowerIncomingArguments(Function &F);
     void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
@@ -138,6 +140,38 @@ static void MarkBlocksLiveIn(BasicBlock *BB,
     MarkBlocksLiveIn(*PI, LiveBBs);
 }
 
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                                      Value *SelVal) {
+  SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+  while (!UseWorkList.empty()) {
+    Value *Val = UseWorkList.pop_back_val();
+    ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+    if (!EVI) continue;
+    if (EVI->getNumIndices() != 1) continue;
+    if (*EVI->idx_begin() == 0)
+      EVI->replaceAllUsesWith(ExnVal);
+    else if (*EVI->idx_begin() == 1)
+      EVI->replaceAllUsesWith(SelVal);
+    if (EVI->getNumUses() == 0)
+      EVI->eraseFromParent();
+  }
+
+  if (LPI->getNumUses() == 0)  return;
+
+  // There are still some uses of LPI. Construct an aggregate with the exception
+  // values and replace the LPI with that aggregate.
+  Type *LPadType = LPI->getType();
+  Value *LPadVal = UndefValue::get(LPadType);
+  IRBuilder<>
+    Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+  LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+  LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+
+  LPI->replaceAllUsesWith(LPadVal);
+}
+
 /// setupFunctionContext - Allocate the function context on the stack and fill
 /// it with all of the data that we know at this point.
 Value *SjLjEHPass::
@@ -189,12 +223,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
     ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
     Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
-    Type *LPadType = LPI->getType();
-    Value *LPadVal = UndefValue::get(LPadType);
-    LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
-    LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
-
-    LPI->replaceAllUsesWith(LPadVal);
+    substituteLPadValues(LPI, ExnVal, SelVal);
   }
 
   // Personality function
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 751d604..c086073 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -80,7 +80,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
     for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
          I != E;) {
       --I;
-      if (I->getDesc().isCall()) {
+      if (I->isCall()) {
         LSP.second = LIS.getInstructionIndex(I);
         break;
       }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 16cf9b8..0000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp -  Splitter -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
-  class StartSlotComparator {
-  public:
-    StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
-    bool operator()(const MachineBasicBlock *mbb1,
-                    const MachineBasicBlock *mbb2) const {
-      return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
-    }
-  private:
-    LiveIntervals &lis;
-  };
-
-  class LoopSplit {
-  public:
-    LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
-      : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
-      assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-             "Cannot split physical registers.");
-    }
-
-    LiveInterval& getLI() const { return li; }
-
-    MachineLoop& getLoop() const { return loop; }
-
-    bool isValid() const { return valid; }
-
-    bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
-    void invalidate() { valid = false; }
-
-    void splitIncoming() { inSplit = true; }
-
-    void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
-    void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
-    void apply() {
-      assert(valid && "Attempt to apply invalid split.");
-      applyIncoming();
-      applyOutgoing();
-      copyRanges();
-      renameInside();
-    }
-
-  private:
-    LoopSplitter &ls;
-    LiveInterval &li;
-    MachineLoop &loop;
-    bool valid, inSplit;
-    std::set<MachineLoop::Edge> outSplits;
-    std::vector<MachineInstr*> loopInstrs;
-
-    LiveInterval *newLI;
-    std::map<VNInfo*, VNInfo*> vniMap;
-
-    LiveInterval* getNewLI() {
-      if (newLI == 0) {
-        const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
-        unsigned vreg = ls.mri->createVirtualRegister(trc);
-        newLI = &ls.lis->getOrCreateInterval(vreg);
-      }
-      return newLI;
-    }
-
-    VNInfo* getNewVNI(VNInfo *oldVNI) {
-      VNInfo *newVNI = vniMap[oldVNI];
-
-      if (newVNI == 0) {
-        newVNI = getNewLI()->createValueCopy(oldVNI,
-                                             ls.lis->getVNInfoAllocator());
-        vniMap[oldVNI] = newVNI;
-      }
-
-      return newVNI;
-    }
-
-    void applyIncoming() {
-      if (!inSplit) {
-        return;
-      }
-
-      MachineBasicBlock *preHeader = loop.getLoopPreheader();
-      if (preHeader == 0) {
-        assert(ls.canInsertPreHeader(loop) &&
-               "Can't insert required preheader.");
-        preHeader = &ls.insertPreHeader(loop);
-      }
-
-      LiveRange *preHeaderRange =
-        ls.lis->findExitingRange(li, preHeader);
-      assert(preHeaderRange != 0 && "Range not live into preheader.");
-
-      // Insert the new copy.
-      MachineInstr *copy = BuildMI(*preHeader,
-                                   preHeader->getFirstTerminator(),
-                                   DebugLoc(),
-                                   ls.tii->get(TargetOpcode::COPY))
-        .addReg(getNewLI()->reg, RegState::Define)
-        .addReg(li.reg, RegState::Kill);
-
-      ls.lis->InsertMachineInstrInMaps(copy);
-
-      SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot();
-
-      VNInfo *newVal = getNewVNI(preHeaderRange->valno);
-      newVal->def = copyDefIdx;
-      newVal->setCopy(copy);
-      li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
-      getNewLI()->addRange(LiveRange(copyDefIdx,
-                                     ls.lis->getMBBEndIdx(preHeader),
-                                     newVal));
-    }
-
-    void applyOutgoing() {
-
-      for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
-                                                 osEnd = outSplits.end();
-           osItr != osEnd; ++osItr) {
-        MachineLoop::Edge edge = *osItr;
-        MachineBasicBlock *outBlock = edge.second;
-        if (ls.isCriticalEdge(edge)) {
-          assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
-          outBlock = &ls.splitEdge(edge, loop);
-        }
-        LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
-        assert(outRange != 0 && "No exiting range?");
-
-        MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
-                                     DebugLoc(),
-                                     ls.tii->get(TargetOpcode::COPY))
-          .addReg(li.reg, RegState::Define)
-          .addReg(getNewLI()->reg, RegState::Kill);
-
-        ls.lis->InsertMachineInstrInMaps(copy);
-
-        SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot();
-        
-        // Blow away output range definition.
-        outRange->valno->def = ls.lis->getInvalidIndex();
-        li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
-        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
-        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal =
-          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
-        getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
-                                       copyDefIdx, newVal));
-                                       
-      }
-    }
-
-    void copyRange(LiveRange &lr) {
-      std::pair<bool, LoopSplitter::SlotPair> lsr =
-        ls.getLoopSubRange(lr, loop);
-      
-      if (!lsr.first)
-        return;
-
-      LiveRange loopRange(lsr.second.first, lsr.second.second,
-                          getNewVNI(lr.valno));
-
-      li.removeRange(loopRange.start, loopRange.end, true);
-
-      getNewLI()->addRange(loopRange);
-    }
-
-    void copyRanges() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
-        if (instr.modifiesRegister(li.reg, 0)) {
-          LiveRange *defRange =
-            li.getLiveRangeContaining(instrIdx.getRegSlot());
-          if (defRange != 0) // May have caught this already.
-            copyRange(*defRange);
-        }
-        if (instr.readsRegister(li.reg, 0)) {
-          LiveRange *useRange =
-            li.getLiveRangeContaining(instrIdx.getRegSlot(true));
-          if (useRange != 0) { // May have caught this already.
-            copyRange(*useRange);
-          }
-        }
-      }
-
-      for (MachineLoop::block_iterator bbItr = loop.block_begin(),
-                                       bbEnd = loop.block_end();
-           bbItr != bbEnd; ++bbItr) {
-        MachineBasicBlock &loopBlock = **bbItr;
-        LiveRange *enteringRange =
-          ls.lis->findEnteringRange(li, &loopBlock);
-        if (enteringRange != 0) {
-          copyRange(*enteringRange);
-        }
-      }
-    } 
-
-    void renameInside() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
-          MachineOperand &mop = instr.getOperand(i);
-          if (mop.isReg() && mop.getReg() == li.reg) {
-            mop.setReg(getNewLI()->reg);
-          }
-        }
-      }
-    }
-
-  };
-
-  void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
-    au.addRequired<MachineDominatorTree>();
-    au.addPreserved<MachineDominatorTree>();
-    au.addRequired<MachineLoopInfo>();
-    au.addPreserved<MachineLoopInfo>();
-    au.addPreservedID(RegisterCoalescerPassID);
-    au.addPreserved<CalculateSpillWeights>();
-    au.addPreserved<LiveStacks>();
-    au.addRequired<SlotIndexes>();
-    au.addPreserved<SlotIndexes>();
-    au.addRequired<LiveIntervals>();
-    au.addPreserved<LiveIntervals>();
-    MachineFunctionPass::getAnalysisUsage(au);
-  }
-
-  bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
-    mf = &fn;
-    mri = &mf->getRegInfo();
-    tii = mf->getTarget().getInstrInfo();
-    tri = mf->getTarget().getRegisterInfo();
-    sis = &getAnalysis<SlotIndexes>();
-    lis = &getAnalysis<LiveIntervals>();
-    mli = &getAnalysis<MachineLoopInfo>();
-    mdt = &getAnalysis<MachineDominatorTree>();
-
-    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
-      mf->getFunction()->getName().str();
-
-    dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
-    dumpOddTerminators();
-
-//      dbgs() << "----------------------------------------\n";
-//      lis->dump();
-//      dbgs() << "----------------------------------------\n";
-       
-//     std::deque<MachineLoop*> loops;
-//     std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-//     dbgs() << "Loops:\n";
-//     while (!loops.empty()) {
-//       MachineLoop &loop = *loops.front();
-//       loops.pop_front();
-//       std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-//       dumpLoopInfo(loop);
-//     }
-    
-    //lis->dump();
-    //exit(0);
-
-    // Setup initial intervals.
-    for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval *li = liItr->second;
-
-      if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
-          !lis->intervalIsInOneMBB(*li)) {
-        intervals.push_back(li);
-      }
-    }
-
-    processIntervals();
-
-    intervals.clear();
-
-//     dbgs() << "----------------------------------------\n";
-//     lis->dump();
-//     dbgs() << "----------------------------------------\n";
-
-    dumpOddTerminators();
-
-    //exit(1);
-
-    return false;
-  }
-
-  void LoopSplitter::releaseMemory() {
-    fqn.clear();
-    intervals.clear();
-    loopRangeMap.clear();
-  }
-
-  void LoopSplitter::dumpOddTerminators() {
-    for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
-         bbItr != bbEnd; ++bbItr) {
-      MachineBasicBlock *mbb = &*bbItr;
-      MachineBasicBlock *a = 0, *b = 0;
-      SmallVector<MachineOperand, 4> c;
-      if (tii->AnalyzeBranch(*mbb, a, b, c)) {
-        dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
-        dbgs() << "  Terminators:\n";
-        for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
-             iItr != iEnd; ++iItr) {
-          MachineInstr *instr= &*iItr;
-          dbgs() << "    " << *instr << "";
-        }
-        dbgs() << "\n  Listed successors: [ ";
-        for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
-             sItr != sEnd; ++sItr) {
-          MachineBasicBlock *succMBB = *sItr;
-          dbgs() << succMBB->getNumber() << " ";
-        }
-        dbgs() << "]\n\n";
-      }
-    }
-  }
-
-  void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
-    MachineBasicBlock &headerBlock = *loop.getHeader();
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    loop.getExitEdges(exitEdges);
-
-    dbgs() << "  Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
-    for (std::vector<MachineBasicBlock*>::const_iterator
-           subBlockItr = loop.getBlocks().begin(),
-           subBlockEnd = loop.getBlocks().end();
-         subBlockItr != subBlockEnd; ++subBlockItr) {
-      MachineBasicBlock &subBlock = **subBlockItr;
-      dbgs() << "BB#" << subBlock.getNumber() << " ";
-    }
-    dbgs() << "], Exit edges: [ ";
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge &exitEdge = *exitEdgeItr;
-      dbgs() << "(MBB#" << exitEdge.first->getNumber()
-             << ", MBB#" << exitEdge.second->getNumber() << ") ";
-    }
-    dbgs() << "], Sub-Loop Headers: [ ";
-    for (MachineLoop::iterator subLoopItr = loop.begin(),
-                               subLoopEnd = loop.end();
-         subLoopItr != subLoopEnd; ++subLoopItr) {
-      MachineLoop &subLoop = **subLoopItr;
-      MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
-      dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
-    mbb.updateTerminator();
-
-    for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
-         miItr != miEnd; ++miItr) {
-      if (lis->isNotInMIMap(miItr)) {
-        lis->InsertMachineInstrInMaps(miItr);
-      }
-    }
-  }
-
-  bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
-    MachineBasicBlock *header = loop.getHeader();
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-
-    for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
-                                          pbEnd = header->pred_end();
-         pbItr != pbEnd; ++pbItr) {
-      MachineBasicBlock *predBlock = *pbItr;
-      if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
-        return false;
-      }
-    }
-
-    MachineFunction::iterator headerItr(header);
-    if (headerItr == mf->begin())
-      return true;
-    MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
-    assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
-    return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
-    assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
-    MachineBasicBlock &header = *loop.getHeader();
-
-    // Save the preds - we'll need to update them once we insert the preheader.
-    typedef std::set<MachineBasicBlock*> HeaderPreds;
-    HeaderPreds headerPreds;
-
-    for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                          predEnd = header.pred_end();
-         predItr != predEnd; ++predItr) {
-      if (!loop.contains(*predItr))
-        headerPreds.insert(*predItr);
-    }
-
-    assert(!headerPreds.empty() && "No predecessors for header?");
-
-    //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
-    MachineBasicBlock *preHeader =
-      mf->CreateMachineBasicBlock(header.getBasicBlock());
-
-    assert(preHeader != 0 && "Failed to create pre-header.");
-
-    mf->insert(header, preHeader);
-
-    for (HeaderPreds::iterator hpItr = headerPreds.begin(),
-                               hpEnd = headerPreds.end(); 
-         hpItr != hpEnd; ++hpItr) {
-      assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
-      MachineBasicBlock &hp = **hpItr;
-      hp.ReplaceUsesOfBlockWith(&header, preHeader);
-    }
-    preHeader->addSuccessor(&header);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(preHeader));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(preHeader);
-
-    if (MachineLoop *parentLoop = loop.getParentLoop()) {
-      assert(parentLoop->getHeader() != loop.getHeader() &&
-             "Parent loop has same header?");
-      parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
-      // Invalidate all parent loop ranges.
-      while (parentLoop != 0) {
-        loopRangeMap.erase(parentLoop);
-        parentLoop = parentLoop->getParentLoop();
-      }
-    }
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-
-      // Is this safe for physregs?
-      // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
-      if (!lis->isLiveInToMBB(li, &header))
-        continue;
-
-      if (lis->isLiveInToMBB(li, preHeader)) {
-        assert(lis->isLiveOutOfMBB(li, preHeader) &&
-               "Range terminates in newly added preheader?");
-        continue;
-      }
-
-      bool insertRange = false;
-
-      for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
-                                            predEnd = preHeader->pred_end();
-           predItr != predEnd; ++predItr) {
-        MachineBasicBlock *predMBB = *predItr;
-        if (lis->isLiveOutOfMBB(li, predMBB)) {
-          insertRange = true;
-          break;
-        }
-      }
-
-      if (!insertRange)
-        continue;
-
-      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
-      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-             "PHI def index points at actual instruction.");
-      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
-      li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
-                            lis->getMBBEndIdx(preHeader),
-                            newVal));
-    }
-
-
-    //dbgs() << "Dumping SlotIndexes:\n";
-    //sis->dump();
-
-    //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
-    return *preHeader;
-  }
-
-  bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
-    assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
-    if (edge.second->pred_size() > 1)
-      return true;
-    return false;
-  }
-
-  bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
-    MachineFunction::iterator outBlockItr(edge.second);
-    if (outBlockItr == mf->begin())
-      return true;
-    MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
-    assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-    return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
-            !tii->AnalyzeBranch(*edge.first, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
-                                             MachineLoop &loop) {
-
-    MachineBasicBlock &inBlock = *edge.first;
-    MachineBasicBlock &outBlock = *edge.second;
-
-    assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
-           "Splitting non-critical edge?");
-
-    //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
-    //       << " -> MBB#" << outBlock.getNumber() << ")...";
-
-    MachineBasicBlock *splitBlock =
-      mf->CreateMachineBasicBlock();
-
-    assert(splitBlock != 0 && "Failed to create split block.");
-
-    mf->insert(&outBlock, splitBlock);
-
-    inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
-    splitBlock->addSuccessor(&outBlock);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(splitBlock));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(splitBlock);
-
-    loopRangeMap.erase(&loop);
-
-    MachineLoop *splitParentLoop = loop.getParentLoop();
-    while (splitParentLoop != 0 &&
-           !splitParentLoop->contains(&outBlock)) {
-      splitParentLoop = splitParentLoop->getParentLoop();
-    }
-
-    if (splitParentLoop != 0) {
-      assert(splitParentLoop->contains(&loop) &&
-             "Split-block parent doesn't contain original loop?");
-      splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-      
-      // Invalidate all parent loop ranges.
-      while (splitParentLoop != 0) {
-        loopRangeMap.erase(splitParentLoop);
-        splitParentLoop = splitParentLoop->getParentLoop();
-      }
-    }
-
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-      bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
-                       lis->isLiveInToMBB(li, &outBlock);
-      if (lis->isLiveInToMBB(li, splitBlock)) {
-        if (!intersects) {
-          li.removeRange(lis->getMBBStartIdx(splitBlock),
-                         lis->getMBBEndIdx(splitBlock), true);
-        }
-      } else if (intersects) {
-        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
-        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
-                                         lis->getVNInfoAllocator());
-        li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
-                              lis->getMBBEndIdx(splitBlock),
-                              newVal));
-      }
-    }
-
-    //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
-    return *splitBlock;
-  }
-
-  LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
-    typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
-    LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
-    if (lrItr == loopRangeMap.end()) {
-      LoopMBBSet loopMBBs((StartSlotComparator(*lis))); 
-      std::copy(loop.block_begin(), loop.block_end(),
-                std::inserter(loopMBBs, loopMBBs.begin()));
-
-      assert(!loopMBBs.empty() && "No blocks in loop?");
-
-      LoopRanges &loopRanges = loopRangeMap[&loop];
-      assert(loopRanges.empty() && "Loop encountered but not processed?");
-      SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
-      loopRanges.push_back(
-        std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
-                       lis->getInvalidIndex()));
-      for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
-                                curBlockEnd = loopMBBs.end();
-           curBlockItr != curBlockEnd; ++curBlockItr) {
-        SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
-        if (newStart != oldEnd) {
-          loopRanges.back().second = oldEnd;
-          loopRanges.push_back(std::make_pair(newStart,
-                                              lis->getInvalidIndex()));
-        }
-        oldEnd = lis->getMBBEndIdx(*curBlockItr);
-      }
-
-      loopRanges.back().second =
-        lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
-      return loopRanges;
-    }
-    return lrItr->second;
-  }
-
-  std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
-                                                           const LiveRange &lr,
-                                                           MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    LoopRanges::iterator lrItr = loopRanges.begin(),
-                         lrEnd = loopRanges.end();
-    while (lrItr != lrEnd && lr.start >= lrItr->second) {
-      ++lrItr;
-    }
-
-    if (lrItr == lrEnd) {
-      SlotIndex invalid = lis->getInvalidIndex();
-      return std::make_pair(false, SlotPair(invalid, invalid));
-    }
-
-    SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
-    SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
-    return std::make_pair(true, SlotPair(srStart, srEnd));      
-  }
-
-  void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
-    for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
-         lrItr != lrEnd; ++lrItr) {
-      dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::processHeader(LoopSplit &split) {
-    MachineBasicBlock &header = *split.getLoop().getHeader();
-    //dbgs() << "  Processing loop header BB#" << header.getNumber() << "\n";
-
-    if (!lis->isLiveInToMBB(split.getLI(), &header))
-      return; // Not live in, but nothing wrong so far.
-
-    MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
-    if (!preHeader) {
-
-      if (!canInsertPreHeader(split.getLoop())) {
-        split.invalidate();
-        return; // Couldn't insert a pre-header. Bail on this interval.
-      }
-
-      for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                            predEnd = header.pred_end();
-           predItr != predEnd; ++predItr) {
-        if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
-          split.splitIncoming();
-          break;
-        }
-      }
-    } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
-      split.splitIncoming();
-    }
-  }
-
-  void LoopSplitter::processLoopExits(LoopSplit &split) {
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    split.getLoop().getExitEdges(exitEdges);
-
-    //dbgs() << "  Processing loop exits:\n";
-
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge exitEdge = *exitEdgeItr;
-
-      LiveRange *outRange =
-        split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
-      if (outRange != 0) {
-        if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
-          split.invalidate();
-          return;
-        }
-
-        split.splitOutgoing(exitEdge);
-      }
-    }
-  }
-
-  void LoopSplitter::processLoopUses(LoopSplit &split) {
-    std::set<MachineInstr*> processed;
-
-    for (MachineRegisterInfo::reg_iterator
-           rItr = mri->reg_begin(split.getLI().reg),
-           rEnd = mri->reg_end();
-      rItr != rEnd; ++rItr) {
-      MachineInstr &instr = *rItr;
-      if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
-        split.addLoopInstr(&instr);
-        processed.insert(&instr);
-      }
-    }
-
-    //dbgs() << "  Rewriting reg" << li.reg << " to reg" << newLI->reg
-    //       << " in blocks [ ";
-    //dbgs() << "]\n";
-  }
-
-  bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
-    assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-           "Attempt to split physical register.");
-
-    LoopSplit split(*this, li, loop);
-    processHeader(split);
-    if (split.isValid())
-      processLoopExits(split);
-    if (split.isValid())
-      processLoopUses(split);
-    if (split.isValid() /* && split.isWorthwhile() */) {
-      split.apply();
-      DEBUG(dbgs() << "Success.\n");
-      return true;
-    }
-    DEBUG(dbgs() << "Failed.\n");
-    return false;
-  }
-
-  void LoopSplitter::processInterval(LiveInterval &li) {
-    std::deque<MachineLoop*> loops;
-    std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
-    while (!loops.empty()) {
-      MachineLoop &loop = *loops.front();
-      loops.pop_front();
-      DEBUG(
-        dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
-               << loop.getHeader()->getNumber() << " ";
-      );
-      if (!splitOverLoop(li, loop)) {
-        // Couldn't split over outer loop, schedule sub-loops to be checked.
-	std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-      }
-    }
-  }
-
-  void LoopSplitter::processIntervals() {
-    while (!intervals.empty()) {
-      LiveInterval &li = *intervals.front();
-      intervals.pop_front();
-
-      assert(!lis->intervalIsInOneMBB(li) &&
-             "Single interval in process worklist.");
-
-      processInterval(li);      
-    }
-  }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b..0000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-  class LiveInterval;
-  class LiveIntervals;
-  struct LiveRange;
-  class LoopSplit;
-  class MachineDominatorTree;
-  class MachineRegisterInfo;
-  class SlotIndexes;
-  class TargetInstrInfo;
-  class VNInfo;
-
-  class LoopSplitter : public MachineFunctionPass {
-    friend class LoopSplit;
-  public:
-    static char ID;
-
-    LoopSplitter() : MachineFunctionPass(ID) {
-      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
-    virtual bool runOnMachineFunction(MachineFunction &fn);
-
-    virtual void releaseMemory();
-
-
-  private:
-
-    MachineFunction *mf;
-    LiveIntervals *lis;
-    MachineLoopInfo *mli;
-    MachineRegisterInfo *mri;
-    MachineDominatorTree *mdt;
-    SlotIndexes *sis;
-    const TargetInstrInfo *tii;
-    const TargetRegisterInfo *tri;
-
-    std::string fqn;
-    std::deque<LiveInterval*> intervals;
-
-    typedef std::pair<SlotIndex, SlotIndex> SlotPair;
-    typedef std::vector<SlotPair> LoopRanges;
-    typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
-    LoopRangeMap loopRangeMap;
-
-    void dumpLoopInfo(MachineLoop &loop);
-
-    void dumpOddTerminators();
-
-    void updateTerminators(MachineBasicBlock &mbb);
-
-    bool canInsertPreHeader(MachineLoop &loop);
-    MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
-    bool isCriticalEdge(MachineLoop::Edge &edge);
-    bool canSplitEdge(MachineLoop::Edge &edge);
-    MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
-    LoopRanges& getLoopRanges(MachineLoop &loop);
-    std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
-                                              MachineLoop &loop);
-
-    void dumpLoopRanges(MachineLoop &loop);
-
-    void processHeader(LoopSplit &split);
-    void processLoopExits(LoopSplit &split);
-    void processLoopUses(LoopSplit &split);
-
-    bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
-    void processInterval(LiveInterval &li);
-
-    void processIntervals();
-  };
-
-}
-
-#endif
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a..031377b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -553,7 +553,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
 
   bool HasIndirectbr = false;
   if (!TailBB.empty())
-    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+    HasIndirectbr = TailBB.back().isIndirectBranch();
 
   if (HasIndirectbr && PreRegAlloc)
     MaxDuplicateCount = 20;
@@ -561,22 +561,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
-       ++I) {
+  for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable())
+    if (I->isNotDuplicable())
       return false;
 
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn())
+    if (PreRegAlloc && I->isReturn())
       return false;
 
     // Avoid duplicating calls before register allocation. Calls presents a
     // barrier to register allocation so duplicating them may end up increasing
     // spills.
-    if (PreRegAlloc && I->getDesc().isCall())
+    if (PreRegAlloc && I->isCall())
       return false;
 
     if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +610,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
     ++I;
   if (I == E)
     return true;
-  return I->getDesc().isUnconditionalBranch();
+  return I->isUnconditionalBranch();
 }
 
 static bool
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 122f869..cadb878 100644
--- a/lib/Target/TargetFrameLowering.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,4 +1,4 @@
-//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 08e2b16..7ed9455 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -121,6 +122,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
                                                 unsigned &SrcOpIdx1,
                                                 unsigned &SrcOpIdx2) const {
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
@@ -136,11 +140,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 }
 
 
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+  if (!MI->isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
 bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
+
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return false;
 
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +239,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
 
 MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
                                              MachineFunction &MF) const {
-  assert(!Orig->getDesc().isNotDuplicable() &&
+  assert(!Orig->isNotDuplicable() &&
          "Instruction cannot be duplicated");
   return MF.CloneMachineInstr(Orig);
 }
@@ -288,10 +309,10 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
   if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
     assert((!(Flags & MachineMemOperand::MOStore) ||
-            NewMI->getDesc().mayStore()) &&
+            NewMI->mayStore()) &&
            "Folded a def to a non-store!");
     assert((!(Flags & MachineMemOperand::MOLoad) ||
-            NewMI->getDesc().mayLoad()) &&
+            NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
@@ -331,7 +352,7 @@ MachineInstr*
 TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
                                    const SmallVectorImpl<unsigned> &Ops,
                                    MachineInstr* LoadMI) const {
-  assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+  assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -382,10 +403,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
       MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
     return true;
 
-  const MCInstrDesc &MCID = MI->getDesc();
-
   // Avoid instructions obviously unsafe for remat.
-  if (MCID.isNotDuplicable() || MCID.mayStore() ||
+  if (MI->isNotDuplicable() || MI->mayStore() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
@@ -395,7 +414,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+  if (MI->mayLoad() && !MI->isInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
@@ -456,7 +475,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
                                                const MachineBasicBlock *MBB,
                                                const MachineFunction &MF) const{
   // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isLabel())
     return true;
 
   // Don't attempt to schedule around any instruction that defines
@@ -492,3 +511,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   return (ScheduleHazardRecognizer *)
     new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index c43e5b6..7fe164a 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -536,9 +536,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
   MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
-  MachineModuleInfoImpl::StubValueTy &StubSym =
-      GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
-                                  MachOMMI.getGVStubEntry(SSym);
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (StubSym.getPointer() == 0) {
     MCSymbol *Sym = Mang->getSymbol(GV);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 0000000..0f59d01
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+  // Check to see if we should eliminate non-leaf frame pointers and then
+  // check to see if we should eliminate all frame pointers.
+  if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    return MFI->hasCalls();
+  }
+
+  return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line.  When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+  return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+  return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+  return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a2e8134..6a63335 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -242,7 +242,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   // appropriate location, we can try to sink the current instruction
   // past it.
   if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      KillMI->getDesc().isTerminator())
+      KillMI->isTerminator())
     return false;
 
   // If any of the definitions are used by another instruction between the
@@ -498,8 +498,7 @@ MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
     MachineInstr *UseMI = &*UI;
     if (UseMI == MI || UseMI->getParent() != MBB)
       continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
-    if (DI != DistanceMap.end())
+    if (DistanceMap.count(UseMI))
       continue;
     if (!UI.getOperand().isKill())
       return 0;
@@ -817,10 +816,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.mayStore() || MCID.isCall())
+  if (MI->mayStore() || MI->isCall())
     return false;
-  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+  if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -918,9 +916,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
     // Don't mess with copies, they may be coalesced later.
     return false;
 
-  const MCInstrDesc &MCID = KillMI->getDesc();
-  if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
-      MCID.isTerminator())
+  if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+      KillMI->isBranch() || KillMI->isTerminator())
     // Don't move pass calls, etc.
     return false;
 
@@ -975,9 +972,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
     ++NumVisited;
-    const MCInstrDesc &OMCID = OtherMI->getDesc();
-    if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() ||
-        OMCID.isTerminator())
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
       // Don't move pass calls, etc.
       return false;
     for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
@@ -1119,9 +1115,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
     if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
     ++NumVisited;
-    const MCInstrDesc &MCID = OtherMI->getDesc();
-    if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() ||
-        MCID.isTerminator())
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
       // Don't move pass calls, etc.
       return false;
     SmallVector<unsigned, 2> OtherDefs;
@@ -1201,7 +1196,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     return false;
 
   MachineInstr &MI = *mi;
-  const MCInstrDesc &MCID = MI.getDesc();
   unsigned regA = MI.getOperand(DstIdx).getReg();
   unsigned regB = MI.getOperand(SrcIdx).getReg();
 
@@ -1223,7 +1217,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (MCID.isCommutable() && MI.getNumOperands() >= 3 &&
+  if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
       TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
@@ -1261,7 +1255,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
-  if (MCID.isConvertibleTo3Addr()) {
+  if (MI.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -1288,7 +1282,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   movq (%rax), %rcx
   //   addq %rdx, %rcx
   // because it's preferable to schedule a load than a register copy.
-  if (MCID.mayLoad() && !regBKilled) {
+  if (MI.mayLoad() && !regBKilled) {
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
@@ -1531,7 +1525,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           // If it's safe and profitable, remat the definition instead of
           // copying it.
           if (DefMI &&
-              DefMI->getDesc().isAsCheapAsAMove() &&
+              DefMI->isAsCheapAsAMove() &&
               DefMI->isSafeToReMat(TII, AA, regB) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index fdffcb6..441f1e8 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMDebugInfo
   DWARFDebugLine.cpp
   DWARFFormValue.cpp
   )
-
-add_llvm_library_dependencies(LLVMDebugInfo
-  LLVMSupport
-  )
diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt
index b46d3d2..210b9f9 100644
--- a/lib/DebugInfo/LLVMBuild.txt
+++ b/lib/DebugInfo/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = DebugInfo
 parent = Libraries
 required_libraries = Support
-
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index fb14d41..58caae8 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -4,13 +4,6 @@ add_llvm_library(LLVMExecutionEngine
   TargetSelect.cpp
   )
 
-add_llvm_library_dependencies(LLVMExecutionEngine
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(Interpreter)
 add_subdirectory(JIT)
 add_subdirectory(MCJIT)
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 525877b..7829a29 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cmath>
@@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
   Module *M,
   std::string *ErrorStr,
   JITMemoryManager *JMM,
-  CodeGenOpt::Level OptLevel,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
   Module *M,
   std::string *ErrorStr,
   JITMemoryManager *JMM,
-  CodeGenOpt::Level OptLevel,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
 ExecutionEngine *ExecutionEngine::createJIT(Module *M,
                                             std::string *ErrorStr,
                                             JITMemoryManager *JMM,
-                                            CodeGenOpt::Level OptLevel,
+                                            CodeGenOpt::Level OL,
                                             bool GVsWithCode,
                                             Reloc::Model RM,
                                             CodeModel::Model CMM) {
@@ -436,11 +435,14 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
   StringRef MCPU = "";
   SmallVector<std::string, 1> MAttrs;
 
+  Triple TT(M->getTargetTriple());
+  // TODO: permit custom TargetOptions here
   TargetMachine *TM =
-    EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr);
+    EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, TargetOptions(), RM,
+                                CMM, OL, ErrorStr);
   if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
 
-  return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+  return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
 }
 
 ExecutionEngine *EngineBuilder::create() {
@@ -465,17 +467,25 @@ ExecutionEngine *EngineBuilder::create() {
   // Unless the interpreter was explicitly selected or the JIT is not linked,
   // try making a JIT.
   if (WhichEngine & EngineKind::JIT) {
-    if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs,
+    Triple TT(M->getTargetTriple());
+    if (TargetMachine *TM = EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs,
+                                                        Options,
                                                         RelocModel, CMModel,
-                                                        ErrorStr)) {
+                                                        OptLevel, ErrorStr)) {
+      if (!TM->getTarget().hasJIT()) {
+        errs() << "WARNING: This target JIT is not designed for the host"
+               << " you are running.  If bad things happen, please choose"
+               << " a different -march switch.\n";
+      }
+
       if (UseMCJIT && ExecutionEngine::MCJITCtor) {
         ExecutionEngine *EE =
-          ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+          ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
                                      AllocateGVsWithCode, TM);
         if (EE) return EE;
       } else if (ExecutionEngine::JITCtor) {
         ExecutionEngine *EE =
-          ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+          ExecutionEngine::JITCtor(M, ErrorStr, JMM,
                                    AllocateGVsWithCode, TM);
         if (EE) return EE;
       }
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 4fb58c2..d331f83 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -12,14 +12,6 @@ add_llvm_library(LLVMInterpreter
   Interpreter.cpp
   )
 
-add_llvm_library_dependencies(LLVMInterpreter
-  LLVMCodeGen
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMSupport
-  LLVMTarget
-  )
-
 if( LLVM_ENABLE_FFI )
   target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
 endif()
diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
index 459426d..327b320 100644
--- a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Interpreter
 parent = ExecutionEngine
 required_libraries = CodeGen Core ExecutionEngine Support Target
-
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index 813ccce..cefb0ae 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -10,13 +10,3 @@ add_llvm_library(LLVMJIT
   JITMemoryManager.cpp
   OProfileJITEventListener.cpp
   )
-
-add_llvm_library_dependencies(LLVMJIT
-  LLVMCodeGen
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMMC
-  LLVMRuntimeDyld
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index d773009..e4f6bc4 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -206,7 +206,6 @@ void DarwinRegisterFrame(void* FrameBegin) {
 ExecutionEngine *JIT::createJIT(Module *M,
                                 std::string *ErrorStr,
                                 JITMemoryManager *JMM,
-                                CodeGenOpt::Level OptLevel,
                                 bool GVsWithCode,
                                 TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -216,7 +215,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
 
   // If the target supports JIT code generation, create the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo()) {
-    return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+    return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
   } else {
     if (ErrorStr)
       *ErrorStr = "target does not support JIT code generation";
@@ -268,7 +267,7 @@ extern "C" {
 }
 
 JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+         JITMemoryManager *JMM, bool GVsWithCode)
   : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
     isAlreadyCodeGenerating(false) {
   setTargetData(TM.getTargetData());
@@ -288,7 +287,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
 
   // Turn the machine code intermediate representation into bytes in memory that
   // may be executed.
-  if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+  if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
     report_fatal_error("Target does not support machine code emission!");
   }
 
@@ -341,7 +340,7 @@ void JIT::addModule(Module *M) {
 
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
-    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+    if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
       report_fatal_error("Target does not support machine code emission!");
     }
 
@@ -372,7 +371,7 @@ bool JIT::removeModule(Module *M) {
 
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
-    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+    if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
       report_fatal_error("Target does not support machine code emission!");
     }
 
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 92dcb0e..fbb9416 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -78,8 +78,7 @@ class JIT : public ExecutionEngine {
 
 
   JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
-      bool AllocateGVsWithCode);
+      JITMemoryManager *JMM, bool AllocateGVsWithCode);
 public:
   ~JIT();
 
@@ -185,7 +184,6 @@ public:
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
-                                    CodeGenOpt::Level OptLevel,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 2e90968..abb70fb 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -115,7 +115,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
   // When trying to debug why GDB isn't getting the debug info right, it's
   // awfully helpful to write the object file to disk so that it can be
   // inspected with readelf and objdump.
-  if (JITEmitDebugInfoToDisk) {
+  if (TM.Options.JITEmitDebugInfoToDisk) {
     std::string Filename;
     raw_string_ostream O2(Filename);
     O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getName() << ".o";
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 8f84ac7..42a136e 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
           MI != E; ++MI) {
       if (!MI->isLabel()) {
-        MayThrow |= MI->getDesc().isCall();
+        MayThrow |= MI->isCall();
         continue;
       }
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 24020ee..d9fa509 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -362,10 +362,16 @@ namespace {
     /// Instance of the JIT
     JIT *TheJIT;
 
+    bool JITExceptionHandling;
+
+    bool JITEmitDebugInfo;
+
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), TheJIT(&jit) {
+        EmittedFunctions(this), TheJIT(&jit),
+        JITExceptionHandling(TM.Options.JITExceptionHandling),
+        JITEmitDebugInfo(TM.Options.JITEmitDebugInfo) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -1037,7 +1043,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
     EmittedFunctions.erase(Emitted);
   }
 
-  if(JITExceptionHandling) {
+  if (JITExceptionHandling) {
     TheJIT->DeregisterTable(F);
   }
 
@@ -1047,7 +1053,7 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
 }
 
 
-void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
   if (BufferBegin)
     return JITCodeEmitter::allocateSpace(Size, Alignment);
 
@@ -1059,7 +1065,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
   return CurBufferPtr;
 }
 
-void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
   // Delegate this call through the memory manager.
   return MemMgr->allocateGlobal(Size, Alignment);
 }
diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt
index 21cb300..ca2a565 100644
--- a/lib/ExecutionEngine/JIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = JIT
 parent = ExecutionEngine
 required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target
-
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 1ef6a44..d426969 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -15,9 +15,11 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = Interpreter JIT MCJIT RuntimeDyld
+
 [component_0]
 type = Library
 name = ExecutionEngine
 parent = Libraries
 required_libraries = Core MC Support Target
-
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index aae8a1b..38fdffa 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -2,11 +2,3 @@ add_llvm_library(LLVMMCJIT
   MCJIT.cpp
   Intercept.cpp
   )
-
-add_llvm_library_dependencies(LLVMMCJIT
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMRuntimeDyld
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
index 9b08d3b..90f4d2f 100644
--- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = MCJIT
 parent = ExecutionEngine
 required_libraries = Core ExecutionEngine RuntimeDyld Support Target
-
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index d5f407d..d5aaec9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() {
 ExecutionEngine *MCJIT::createJIT(Module *M,
                                   std::string *ErrorStr,
                                   JITMemoryManager *JMM,
-                                  CodeGenOpt::Level OptLevel,
                                   bool GVsWithCode,
                                   TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
 
   // If the target supports JIT code generation, create the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo())
-    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
-                     GVsWithCode);
+    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode);
 
   if (ErrorStr)
     *ErrorStr = "target does not support JIT code generation";
@@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
-             RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
-             bool AllocateGVsWithCode)
+             RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
   : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
 
   setTargetData(TM->getTargetData());
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index b64c21a..2a98fc9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -24,8 +24,7 @@ namespace llvm {
 
 class MCJIT : public ExecutionEngine {
   MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
-        RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
-        bool AllocateGVsWithCode);
+        RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
 
   TargetMachine *TM;
   MCContext *Ctx;
@@ -79,7 +78,6 @@ public:
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
-                                    CodeGenOpt::Level OptLevel,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index c236d1d..59bdfee3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMRuntimeDyld
   RuntimeDyld.cpp
   RuntimeDyldMachO.cpp
   )
-
-add_llvm_library_dependencies(LLVMRuntimeDyld
-  LLVMObject
-  LLVMSupport
-  )
diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
index 5e39814..97dc861 100644
--- a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = RuntimeDyld
 parent = ExecutionEngine
 required_libraries = Object Support
-
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 45480a6..3937fe5 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -7,33 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This just asks the TargetRegistry for the appropriate JIT to use, and allows
-// the user to specify a specific one on the commandline with -march=x. Clients
-// should initialize targets prior to calling createJIT.
+// This just asks the TargetRegistry for the appropriate target to use, and
+// allows the user to specify a specific one on the commandline with -march=x,
+// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to
+// calling selectTarget().
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 /// selectTarget - Pick a target either via -march or by guessing the native
 /// arch.  Add any CPU features specified via -mcpu or -mattr.
-TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
                               StringRef MArch,
                               StringRef MCPU,
                               const SmallVectorImpl<std::string>& MAttrs,
+                              const TargetOptions &Options,
                               Reloc::Model RM,
                               CodeModel::Model CM,
+                              CodeGenOpt::Level OL,
                               std::string *ErrorStr) {
-  Triple TheTriple(Mod->getTargetTriple());
+  Triple TheTriple(TargetTriple);
   if (TheTriple.getTriple().empty())
     TheTriple.setTriple(sys::getDefaultTargetTriple());
 
@@ -55,7 +57,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
     }
 
     // Adjust the triple to match (if known), otherwise stick with the
-    // module/host triple.
+    // requested/host triple.
     Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
     if (Type != Triple::UnknownArch)
       TheTriple.setArch(Type);
@@ -69,12 +71,6 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
     }
   }
 
-  if (!TheTarget->hasJIT()) {
-    errs() << "WARNING: This target JIT is not designed for the host you are"
-           << " running.  If bad things happen, please choose a different "
-           << "-march switch.\n";
-  }
-
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
   if (!MAttrs.empty()) {
@@ -87,7 +83,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
   // Allocate a target...
   TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
                                                          MCPU, FeaturesStr,
-                                                         RM, CM);
+                                                         Options,
+                                                         RM, CM, OL);
   assert(Target && "Could not allocate target machine!");
   return Target;
 }
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index c3fa1ff..e22b8cd 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -15,8 +15,10 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+
 [component_0]
 type = Group
 name = Libraries
 parent = $ROOT
-
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 4d8824b..0b6d2f4 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -4,11 +4,3 @@ add_llvm_library(LLVMLinker
   LinkModules.cpp
   Linker.cpp
   )
-
-add_llvm_library_dependencies(LLVMLinker
-  LLVMArchive
-  LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  LLVMTransformUtils
-  )
diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt
index 69f2ac4..2b4c232 100644
--- a/lib/Linker/LLVMBuild.txt
+++ b/lib/Linker/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Linker
 parent = Libraries
 required_libraries = Archive BitReader Core Support TransformUtils
-
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a4ac1bf..b2e62a5 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -45,10 +45,5 @@ add_llvm_library(LLVMMC
   WinCOFFStreamer.cpp
   )
 
-add_llvm_library_dependencies(LLVMMC
-  LLVMObject
-  LLVMSupport
-  )
-
 add_subdirectory(MCParser)
 add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index bd28069..92aad94 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -182,7 +182,7 @@ uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
     if (const MCExpr *Value = Symbol.getVariableValue()) {
       int64_t IntValue;
       if (Value->EvaluateAsAbsolute(IntValue, Layout))
-	return (uint64_t)IntValue;
+        return (uint64_t)IntValue;
     }
   }
 
@@ -1072,7 +1072,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
       WriteBytes(cast<MCDataFragment>(F).getContents().str());
     }
   } else {
-    Asm.WriteSectionData(&SD, Layout);
+    Asm.writeSectionData(&SD, Layout);
   }
 }
 
@@ -1742,14 +1742,26 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
     }
   } else {
     if (IsPCRel) {
-      switch (Modifier) {
-      default:
-        llvm_unreachable("Unimplemented");
-      case MCSymbolRefExpr::VK_None:
-        Type = ELF::R_386_PC32;
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
         break;
-      case MCSymbolRefExpr::VK_PLT:
-        Type = ELF::R_386_PLT32;
+
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_386_PLT32;
+          break;
+        }
         break;
       }
     } else {
@@ -1831,6 +1843,21 @@ void MipsELFObjectWriter::WriteEFlags() {
           ELF::EF_MIPS_ARCH_32R2);
 }
 
+const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                    const MCValue &Target,
+                                                    const MCFragment &F,
+                                                    const MCFixup &Fixup,
+                                                    bool IsPCRel) const {
+  assert(Target.getSymA() && "SymA cannot be 0.");
+  const MCSymbol &Sym = Target.getSymA()->getSymbol();
+  
+  if (Sym.getSection().getKind().isMergeableCString() ||
+      Sym.getSection().getKind().isMergeableConst())
+    return &Sym;
+
+  return NULL;
+}
+
 unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
                                            const MCFixup &Fixup,
                                            bool IsPCRel,
@@ -1858,7 +1885,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   case Mips::fixup_Mips_CALL16:
     Type = ELF::R_MIPS_CALL16;
     break;
-  case Mips::fixup_Mips_GOT16:
+  case Mips::fixup_Mips_GOT_Global:
+  case Mips::fixup_Mips_GOT_Local:
     Type = ELF::R_MIPS_GOT16;
     break;
   case Mips::fixup_Mips_HI16:
@@ -1887,4 +1915,3 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
 
   return Type;
 }
-
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
index 7838206..9adf0b1 100644
--- a/lib/MC/ELFObjectWriter.h
+++ b/lib/MC/ELFObjectWriter.h
@@ -445,6 +445,12 @@ class ELFObjectWriter : public MCObjectWriter {
     virtual void WriteEFlags();
 
   protected:
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel) const;
+
     virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                   bool IsPCRel, bool IsRelocWithSymbol,
                                   int64_t Addend);
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
index 8ad66b6..f35dbe4 100644
--- a/lib/MC/LLVMBuild.txt
+++ b/lib/MC/LLVMBuild.txt
@@ -15,9 +15,11 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = MCDisassembler MCParser
+
 [component_0]
 type = Library
 name = MC
 parent = Libraries
 required_libraries = Object Support
-
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index c330e74..b1e1bdf 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -29,7 +29,6 @@ MCAsmInfo::MCAsmInfo() {
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
-  StructorOutputOrder = Structors::ReversePriorityOrder;
   HasStaticCtorDtorReferenceInStaticMode = false;
   LinkerRequiresNonEmptyDwarfLines = false;
   MaxInstLength = 4;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 434d910..6d34801 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -38,3 +38,11 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
 
   SupportsDataRegions = false;
 }
+
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
+  AllowQuotesInName = true;
+}
+
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+
+}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 537d0a3..24f1243 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -39,7 +39,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
   HasMachoTBSSDirective = true; // Uses .tbss
-  StructorOutputOrder = Structors::PriorityOrder;
   HasStaticCtorDtorReferenceInStaticMode = true;
 
   CodeBegin = "L$start$code$";
@@ -57,8 +56,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
 
   HiddenVisibilityAttr = MCSA_PrivateExtern;
   HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+
   // Doesn't support protected visibility.
-  ProtectedVisibilityAttr = MCSA_Global;
+  ProtectedVisibilityAttr = MCSA_Invalid;
   
   HasDotTypeDotSizeDirective = false;
   HasNoDeadStrip = true;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index d90f7b2..c785c03 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1284,6 +1284,10 @@ void MCAsmStreamer::Finish() {
   if (getContext().hasDwarfFiles() && !UseLoc)
     MCDwarfFileTable::Emit(this);
 
+  // If we are generating dwarf for assembly source files dump out the sections.
+  if (getContext().getGenDwarfForAssembly())
+    MCGenDwarfInfo::Emit(this);
+
   if (!UseCFI)
     EmitFrames(false);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 29adbcb..c5bf6b9 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 namespace {
 namespace stats {
 STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
-STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(evaluateFixup, "Number of evaluated fixups");
 STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
 STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
@@ -136,7 +136,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
 uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
   // The size is the last fragment's end offset.
   const MCFragment &F = SD->getFragmentList().back();
-  return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+  return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
 }
 
 uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
@@ -237,10 +237,10 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
   return SD->getFragment()->getAtom();
 }
 
-bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
                                 const MCFixup &Fixup, const MCFragment *DF,
                                 MCValue &Target, uint64_t &Value) const {
-  ++stats::EvaluateFixup;
+  ++stats::evaluateFixup;
 
   if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
     report_fatal_error("expected relocatable expression");
@@ -312,7 +312,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
   return IsResolved;
 }
 
-uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
                                           const MCFragment &F) const {
   switch (F.getKind()) {
   case MCFragment::FT_Data:
@@ -374,7 +374,7 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
   // Compute fragment offset and size.
   uint64_t Offset = 0;
   if (Prev)
-    Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+    Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
 
   F->Offset = Offset;
   LastValidFragment[F->getParent()] = F;
@@ -390,7 +390,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
   ++stats::EmittedFragments;
 
   // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
   switch (F.getKind()) {
   case MCFragment::FT_Align: {
     MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -493,7 +493,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
   assert(OW->getStream().tell() - Start == FragmentSize);
 }
 
-void MCAssembler::WriteSectionData(const MCSectionData *SD,
+void MCAssembler::writeSectionData(const MCSectionData *SD,
                                    const MCAsmLayout &Layout) const {
   // Ignore virtual sections.
   if (SD->getSection().isVirtualSection()) {
@@ -546,13 +546,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
 }
 
 
-uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout,
                                   MCFragment &F,
                                   const MCFixup &Fixup) {
    // Evaluate the fixup.
    MCValue Target;
    uint64_t FixedValue;
-   if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+   if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
      // The fixup was unresolved, we need a relocation. Inform the object
      // writer of the relocation, and give it an opportunity to adjust the
      // fixup value if need be.
@@ -592,7 +592,7 @@ void MCAssembler::Finish() {
   }
 
   // Layout until everything fits.
-  while (LayoutOnce(Layout))
+  while (layoutOnce(Layout))
     continue;
 
   DEBUG_WITH_TYPE("mc-dump", {
@@ -600,7 +600,7 @@ void MCAssembler::Finish() {
       dump(); });
 
   // Finalize the layout, including fragment lowering.
-  FinishLayout(Layout);
+  finishLayout(Layout);
 
   DEBUG_WITH_TYPE("mc-dump", {
       llvm::errs() << "assembler backend - final-layout\n--\n";
@@ -621,7 +621,7 @@ void MCAssembler::Finish() {
         for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
                ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
           MCFixup &Fixup = *it3;
-          uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+          uint64_t FixedValue = handleFixup(Layout, *DF, Fixup);
           getBackend().ApplyFixup(Fixup, DF->getContents().data(),
                                   DF->getContents().size(), FixedValue);
         }
@@ -631,7 +631,7 @@ void MCAssembler::Finish() {
         for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
                ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
           MCFixup &Fixup = *it3;
-          uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+          uint64_t FixedValue = handleFixup(Layout, *IF, Fixup);
           getBackend().ApplyFixup(Fixup, IF->getCode().data(),
                                   IF->getCode().size(), FixedValue);
         }
@@ -645,8 +645,8 @@ void MCAssembler::Finish() {
   stats::ObjectBytes += OS.tell() - StartOffset;
 }
 
-bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
-                                       const MCFragment *DF,
+bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                       const MCInstFragment *DF,
                                        const MCAsmLayout &Layout) const {
   if (getRelaxAll())
     return true;
@@ -654,16 +654,13 @@ bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
   // If we cannot resolve the fixup value, it requires relaxation.
   MCValue Target;
   uint64_t Value;
-  if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+  if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
     return true;
 
-  // Otherwise, relax if the value is too big for a (signed) i8.
-  //
-  // FIXME: This is target dependent!
-  return int64_t(Value) != int64_t(int8_t(Value));
+  return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
 }
 
-bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF,
                                           const MCAsmLayout &Layout) const {
   // If this inst doesn't ever need relaxation, ignore it. This occurs when we
   // are intentionally pushing out inst fragments, or because we relaxed a
@@ -673,15 +670,15 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
 
   for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
          ie = IF->fixup_end(); it != ie; ++it)
-    if (FixupNeedsRelaxation(*it, IF, Layout))
+    if (fixupNeedsRelaxation(*it, IF, Layout))
       return true;
 
   return false;
 }
 
-bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
                                    MCInstFragment &IF) {
-  if (!FragmentNeedsRelaxation(&IF, Layout))
+  if (!fragmentNeedsRelaxation(&IF, Layout))
     return false;
 
   ++stats::RelaxedInstructions;
@@ -715,7 +712,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
   return true;
 }
 
-bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   int64_t Value = 0;
   uint64_t OldSize = LF.getContents().size();
   bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
@@ -732,8 +729,8 @@ bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   return OldSize != LF.getContents().size();
 }
 
-bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
-				     MCDwarfLineAddrFragment &DF) {
+bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
+                                     MCDwarfLineAddrFragment &DF) {
   int64_t AddrDelta = 0;
   uint64_t OldSize = DF.getContents().size();
   bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
@@ -749,7 +746,7 @@ bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                               MCDwarfCallFrameFragment &DF) {
   int64_t AddrDelta = 0;
   uint64_t OldSize = DF.getContents().size();
@@ -764,7 +761,7 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout,
                                     MCSectionData &SD) {
   MCFragment *FirstInvalidFragment = NULL;
   // Scan for fragments that need relaxation.
@@ -776,19 +773,19 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
     default:
           break;
     case MCFragment::FT_Inst:
-      relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2));
       break;
     case MCFragment::FT_Dwarf:
-      relaxedFrag = RelaxDwarfLineAddr(Layout,
+      relaxedFrag = relaxDwarfLineAddr(Layout,
                                        *cast<MCDwarfLineAddrFragment>(it2));
       break;
     case MCFragment::FT_DwarfFrame:
       relaxedFrag =
-        RelaxDwarfCallFrameFragment(Layout,
+        relaxDwarfCallFrameFragment(Layout,
                                     *cast<MCDwarfCallFrameFragment>(it2));
       break;
     case MCFragment::FT_LEB:
-      relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2));
       break;
     }
     // Update the layout, and remember that we relaxed.
@@ -802,20 +799,20 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
   return false;
 }
 
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
   ++stats::RelaxationSteps;
 
   bool WasRelaxed = false;
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
-    while(LayoutSectionOnce(Layout, SD))
+    while(layoutSectionOnce(Layout, SD))
       WasRelaxed = true;
   }
 
   return WasRelaxed;
 }
 
-void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+void MCAssembler::finishLayout(MCAsmLayout &Layout) {
   // The layout is done. Mark every fragment as valid.
   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
     Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 5cf5f1b..5e2cd83 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -2,12 +2,7 @@ add_llvm_library(LLVMMCDisassembler
   Disassembler.cpp
   EDDisassembler.cpp
   EDInst.cpp
+  EDMain.cpp
   EDOperand.cpp
   EDToken.cpp
   )
-
-add_llvm_library_dependencies(LLVMMCDisassembler
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  )
diff --git a/tools/edis/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp
index 14a17d6..3fd355b 100644
--- a/tools/edis/EDMain.cpp
+++ b/lib/MC/MCDisassembler/EDMain.cpp
@@ -11,29 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-// FIXME: This code isn't layered right, the headers should be moved to
-// include llvm/MC/MCDisassembler or something.
-#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
-#include "../../lib/MC/MCDisassembler/EDInst.h"
-#include "../../lib/MC/MCDisassembler/EDOperand.h"
-#include "../../lib/MC/MCDisassembler/EDToken.h"
-#include "llvm/Support/TargetSelect.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
 #include "llvm-c/EnhancedDisassembly.h"
 using namespace llvm;
 
 int EDGetDisassembler(EDDisassemblerRef *disassembler,
                       const char *triple,
                       EDAssemblySyntax_t syntax) {
-  static bool initialized;
-  if (!initialized) {
-    // Initialize targets and assembly printers/parsers.
-    llvm::InitializeAllTargetInfos();
-    llvm::InitializeAllTargetMCs();
-    llvm::InitializeAllAsmParsers();
-    llvm::InitializeAllDisassemblers();
-    initialized = true;
-  }
-
   EDDisassembler::AssemblySyntax Syntax;
   switch (syntax) {
   default: assert(0 && "Unknown assembly syntax!");
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index d2bbd7d..46ab65f 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,9 +19,12 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
@@ -423,6 +426,342 @@ void MCDwarfFile::dump() const {
   print(dbgs());
 }
 
+// Utility function to write a tuple for .debug_abbrev.
+static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
+  MCOS->EmitULEB128IntValue(Name);
+  MCOS->EmitULEB128IntValue(Form);
+}
+
+// When generating dwarf for assembly source files this emits
+// the data for .debug_abbrev section which contains three DIEs.
+static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+
+  // DW_TAG_compile_unit DIE abbrev (1).
+  MCOS->EmitULEB128IntValue(1);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
+  StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+  if (!DwarfDebugFlags.empty())
+    EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // DW_TAG_subprogram DIE abbrev (2).
+  MCOS->EmitULEB128IntValue(2);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_subprogram);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+  EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // DW_TAG_unspecified_parameters DIE abbrev (3).
+  MCOS->EmitULEB128IntValue(3);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // Terminate the abbreviations for this compilation unit.
+  MCOS->EmitIntValue(0, 1);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_aranges section.  Which contains a header and a table of pairs of
+// PointerSize'ed values for the address and size of section(s) with line table
+// entries (just the default .text in our case) and a terminating pair of zeros.
+static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+
+  // Create a symbol at the end of the section that we are creating the dwarf
+  // debugging info to use later in here as part of the expression to calculate
+  // the size of the section for the table.
+  MCOS->SwitchSection(context.getGenDwarfSection());
+  MCSymbol *SectionEndSym = context.CreateTempSymbol();
+  MCOS->EmitLabel(SectionEndSym);
+  context.setGenDwarfSectionEndSym(SectionEndSym);
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+  // This will be the length of the .debug_aranges section, first account for
+  // the size of each item in the header (see below where we emit these items).
+  int Length = 4 + 2 + 4 + 1 + 1;
+
+  // Figure the padding after the header before the table of address and size
+  // pairs who's values are PointerSize'ed.
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  int AddrSize = asmInfo.getPointerSize();
+  int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
+  if (Pad == 2 * AddrSize)
+    Pad = 0;
+  Length += Pad;
+
+  // Add the size of the pair of PointerSize'ed values for the address and size
+  // of the one default .text section we have in the table.
+  Length += 2 * AddrSize;
+  // And the pair of terminating zeros.
+  Length += 2 * AddrSize;
+
+
+  // Emit the header for this section.
+  // The 4 byte length not including the 4 byte value for the length.
+  MCOS->EmitIntValue(Length - 4, 4);
+  // The 2 byte version, which is 2.
+  MCOS->EmitIntValue(2, 2);
+  // The 4 byte offset to the compile unit in the .debug_info from the start
+  // of the .debug_info, it is at the start of that section so this is zero.
+  MCOS->EmitIntValue(0, 4);
+  // The 1 byte size of an address.
+  MCOS->EmitIntValue(AddrSize, 1);
+  // The 1 byte size of a segment descriptor, we use a value of zero.
+  MCOS->EmitIntValue(0, 1);
+  // Align the header with the padding if needed, before we put out the table.
+  for(int i = 0; i < Pad; i++)
+    MCOS->EmitIntValue(0, 1);
+
+  // Now emit the table of pairs of PointerSize'ed values for the section(s)
+  // address and size, in our case just the one default .text section.
+  const MCExpr *Addr = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+  const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+    *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
+  MCOS->EmitAbsValue(Addr, AddrSize);
+  MCOS->EmitAbsValue(Size, AddrSize);
+
+  // And finally the pair of terminating zeros.
+  MCOS->EmitIntValue(0, AddrSize);
+  MCOS->EmitIntValue(0, AddrSize);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_info section which contains three parts.  The header, the compile_unit
+// DIE and a list of subprogram DIEs.
+static void EmitGenDwarfInfo(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); 
+
+  // Create a symbol at the start and end of this section used in here for the
+  // expression to calculate the length in the header.
+  MCSymbol *InfoStart = context.CreateTempSymbol();
+  MCOS->EmitLabel(InfoStart);
+  MCSymbol *InfoEnd = context.CreateTempSymbol();
+
+  // First part: the header.
+
+  // The 4 byte total length of the information for this compilation unit, not
+  // including these 4 bytes.
+  const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
+  MCOS->EmitAbsValue(Length, 4);
+
+  // The 2 byte DWARF version, which is 2.
+  MCOS->EmitIntValue(2, 2);
+
+  // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
+  // it is at the start of that section so this is zero.
+  MCOS->EmitIntValue(0, 4);
+
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  int AddrSize = asmInfo.getPointerSize();
+  // The 1 byte size of an address.
+  MCOS->EmitIntValue(AddrSize, 1);
+
+  // Second part: the compile_unit DIE.
+
+  // The DW_TAG_compile_unit DIE abbrev (1).
+  MCOS->EmitULEB128IntValue(1);
+
+  // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
+  // which is at the start of that section so this is zero.
+  MCOS->EmitIntValue(0, 4);
+
+  // AT_low_pc, the first address of the default .text section.
+  const MCExpr *Start = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+  MCOS->EmitAbsValue(Start, AddrSize);
+
+  // AT_high_pc, the last address of the default .text section.
+  const MCExpr *End = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
+  MCOS->EmitAbsValue(End, AddrSize);
+
+  // AT_name, the name of the source file.  Reconstruct from the first directory
+  // and file table entries.
+  const std::vector<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs();
+  if (MCDwarfDirs.size() > 0) {
+    MCOS->EmitBytes(MCDwarfDirs[0], 0);
+    MCOS->EmitBytes("/", 0);
+  }
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles();
+  MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_comp_dir, the working directory the assembly was done in.
+  llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory();
+  MCOS->EmitBytes(StringRef(CWD.c_str()), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_APPLE_flags, the command line arguments of the assembler tool.
+  StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+  if (!DwarfDebugFlags.empty()){
+    MCOS->EmitBytes(DwarfDebugFlags, 0);
+    MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+  }
+
+  // AT_producer, the version of the assembler tool.
+  MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0);
+  MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0);
+  MCOS->EmitBytes(StringRef(")"), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_language, a 4 byte value.  We use DW_LANG_Mips_Assembler as the dwarf2
+  // draft has no standard code for assembler.
+  MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
+
+  // Third part: the list of subprogram DIEs.
+
+  // Loop on saved info for dwarf subprograms and create the DIEs for them.
+  const std::vector<const MCGenDwarfSubprogramEntry *> &Entries =
+    MCOS->getContext().getMCGenDwarfSubprogramEntries();
+  for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it =
+       Entries.begin(), ie = Entries.end(); it != ie;
+       ++it) {
+    const MCGenDwarfSubprogramEntry *Entry = *it;
+
+    // The DW_TAG_subprogram DIE abbrev (2).
+    MCOS->EmitULEB128IntValue(2);
+
+    // AT_name, of the label without any leading underbar.
+    MCOS->EmitBytes(Entry->getName(), 0);
+    MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+    // AT_decl_file, index into the file table.
+    MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+
+    // AT_decl_line, source line number.
+    MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+
+    // AT_low_pc, start address of the label.
+    const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+                                             MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitAbsValue(AT_low_pc, AddrSize);
+
+    // AT_high_pc, end address which is the next label or end of the section.
+    std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator next = it+1;
+    if (next != Entries.end()){
+      const MCGenDwarfSubprogramEntry *NextEntry = *next;
+      const MCExpr *AT_high_pc = MCSymbolRefExpr::Create(NextEntry->getLabel(),
+                                             MCSymbolRefExpr::VK_None, context);
+      MCOS->EmitAbsValue(AT_high_pc, AddrSize);
+    } else {
+      MCOS->EmitAbsValue(End, AddrSize);
+    }
+
+    // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
+    MCOS->EmitIntValue(0, 1);
+
+    // The DW_TAG_unspecified_parameters DIE abbrev (3).
+    MCOS->EmitULEB128IntValue(3);
+
+    // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
+    MCOS->EmitIntValue(0, 1);
+  }
+  // Deallocate the MCGenDwarfSubprogramEntry classes that saved away the info
+  // for the dwarf subprograms.
+  for (std::vector<const MCGenDwarfSubprogramEntry *>::const_iterator it =
+       Entries.begin(), ie = Entries.end(); it != ie;
+       ++it) {
+    const MCGenDwarfSubprogramEntry *Entry = *it;
+    delete Entry;
+  }
+
+  // Add the NULL DIE terminating the Compile Unit DIE's.
+  MCOS->EmitIntValue(0, 1);
+
+  // Now set the value of the symbol at the end of the info section.
+  MCOS->EmitLabel(InfoEnd);
+}
+
+//
+// When generating dwarf for assembly source files this emits the Dwarf
+// sections.
+//
+void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
+  // Create the dwarf sections in this order (.debug_line already created).
+  MCContext &context = MCOS->getContext();
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+  // If there are no line table entries then do not emit any section contents.
+  if (context.getMCLineSections().empty())
+    return;
+
+  // Output the data for .debug_aranges section.
+  EmitGenDwarfAranges(MCOS);
+
+  // Output the data for .debug_abbrev section.
+  EmitGenDwarfAbbrev(MCOS);
+
+  // Output the data for .debug_info section.
+  EmitGenDwarfInfo(MCOS);
+}
+
+//
+// When generating dwarf for assembly source files this is called when symbol
+// for a label is created.  If this symbol is not a temporary and is in the
+// section that dwarf is being generated for, save the needed info to create
+// a dwarf subprogram.
+//
+void MCGenDwarfSubprogramEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
+                                     SourceMgr &SrcMgr, SMLoc &Loc) {
+  // We won't create dwarf subprogram's for temporary symbols or symbols not in
+  // the default text.
+  if (Symbol->isTemporary())
+    return;
+  MCContext &context = MCOS->getContext();
+  if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+    return;
+
+  // The dwarf subprogram's name does not have the symbol name's leading
+  // underbar if any.
+  StringRef Name = Symbol->getName();
+  if (Name.startswith("_"))
+    Name = Name.substr(1, Name.size()-1);
+
+  // Get the dwarf file number to be used for the dwarf subprogram.
+  unsigned FileNumber = context.getGenDwarfFileNumber();
+
+  // Finding the line number is the expensive part which is why we just don't
+  // pass it in as for some symbols we won't create a dwarf subprogram.
+  int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
+
+  // We create a temporary symbol for use for the AT_high_pc and AT_low_pc
+  // values so that they don't have things like an ARM thumb bit from the
+  // original symbol. So when used they won't get a low bit set after
+  // relocation.
+  MCSymbol *Label = context.CreateTempSymbol();
+  MCOS->EmitLabel(Label);
+
+  // Create and entry for the info and add it to the other entries.
+  MCGenDwarfSubprogramEntry *Entry = 
+    new MCGenDwarfSubprogramEntry(Name, FileNumber, LineNumber, Label);
+  MCOS->getContext().addMCGenDwarfSubprogramEntry(Entry);
+}
+
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
@@ -1009,10 +1348,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
 
   // Emit the compact unwind info if available.
-  // FIXME: This emits both the compact unwind and the old CIE/FDE
-  //        information. Only one of those is needed.
-  // FIXME: Disable. This seems to still be causing failures.
-  if (false && IsEH && MOFI->getCompactUnwindSection())
+  if (IsEH && MOFI->getCompactUnwindSection())
     for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
       const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
       if (Frame.CompactUnwindEncoding)
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index dad2e7b..f9f98e0 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -37,7 +37,7 @@ void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
   assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
          Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
          Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
+         Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
 
   uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
   SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
@@ -48,7 +48,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
   assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
          Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
          Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
+         Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
   return Type;
 }
 
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 0ea3c64..dcc4666 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -130,7 +130,6 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_WeakDefinition:
   case MCSA_WeakDefAutoPrivate:
   case MCSA_Invalid:
-  case MCSA_ELF_TypeIndFunction:
   case MCSA_IndirectSymbol:
     assert(0 && "Invalid symbol attribute for ELF!");
     break;
@@ -162,6 +161,10 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     MCELF::SetType(SD, ELF::STT_FUNC);
     break;
 
+  case MCSA_ELF_TypeIndFunction:
+    MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+    break;
+
   case MCSA_ELF_TypeObject:
     MCELF::SetType(SD, ELF::STT_OBJECT);
     break;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index aa35815..50ab1f8 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -156,8 +156,6 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
 }
 
 void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
-  // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
-
   // Remember that the function is a thumb function. Fixup and relocation
   // values will need adjusted.
   getAssembler().setIsThumbFunc(Symbol);
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 7d23541..32ba924 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -31,6 +31,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
   if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5))
     CommDirectiveSupportsAlignment = false;
 
+  StructorOutputOrder = Structors::PriorityOrder;
+
   TextSection // .text
     = Ctx->getMachOSection("__TEXT", "__text",
                            MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
@@ -258,6 +260,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     }
   }
 
+  StructorOutputOrder = Structors::ReversePriorityOrder;
+
   // ELF
   BSSSection =
     Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
@@ -385,6 +389,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
 
 void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
   // COFF
+  StructorOutputOrder = Structors::ReversePriorityOrder;
+
   TextSection =
     Ctx->getCOFFSection(".text",
                         COFF::IMAGE_SCN_CNT_CODE |
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 90c957f..663d0ca 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -260,5 +260,9 @@ void MCObjectStreamer::Finish() {
   if (getContext().hasDwarfFiles())
     MCDwarfFileTable::Emit(this);
 
+  // If we are generating dwarf for assembly source files dump out the sections.
+  if (getContext().getGenDwarfForAssembly())
+    MCGenDwarfInfo::Emit(this);
+
   getAssembler().Finish();
 }
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 7883893..aac020d 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -181,6 +181,9 @@ private:
 
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
   bool EnterIncludeFile(const std::string &Filename);
+  /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+  /// This returns true on failure.
+  bool ProcessIncbinFile(const std::string &Filename);
 
   /// \brief Reset the current lexer position to that given by \arg Loc. The
   /// current token is not set; clients should ensure Lex() is called
@@ -227,6 +230,7 @@ private:
 
   bool ParseDirectiveAbort(); // ".abort"
   bool ParseDirectiveInclude(); // ".include"
+  bool ParseDirectiveIncbin(); // ".incbin"
 
   bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
   // ".ifdef" or ".ifndef", depending on expect_defined
@@ -429,6 +433,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
   return false;
 }
 
+/// Process the specified .incbin file by seaching for it in the include paths
+/// then just emiting the byte contents of the file to the streamer. This 
+/// returns true on failure.
+bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+  std::string IncludedFile;
+  int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+  if (NewBuf == -1)
+    return true;
+
+  // Pick up the bytes from the file and emit them.
+  getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(),
+                          DEFAULT_ADDRSPACE);
+  return false;
+}
+
 void AsmParser::JumpToLoc(SMLoc Loc) {
   CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
@@ -468,6 +487,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
     getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+    MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+    getStreamer().EmitLabel(SectionStartSym);
+    getContext().setGenDwarfSectionStartSym(SectionStartSym);
     getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
       StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier());
   }
@@ -1047,6 +1069,12 @@ bool AsmParser::ParseStatement() {
     // Emit the label.
     Out.EmitLabel(Sym);
 
+    // If we are generating dwarf for assembly source files then gather the
+    // info to make a dwarf subprogram entry for this label if needed.
+    if (getContext().getGenDwarfForAssembly())
+      MCGenDwarfSubprogramEntry::Make(Sym, &getStreamer(), getSourceManager(),
+                                      IDLoc);
+
     // Consume any end of statement token, if present, to avoid spurious
     // AddBlankLine calls().
     if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -1174,6 +1202,8 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveAbort();
     if (IDVal == ".include")
       return ParseDirectiveInclude();
+    if (IDVal == ".incbin")
+      return ParseDirectiveIncbin();
 
     if (IDVal == ".code16")
       return TokError(Twine(IDVal) + " not supported yet");
@@ -2197,6 +2227,31 @@ bool AsmParser::ParseDirectiveInclude() {
   return false;
 }
 
+/// ParseDirectiveIncbin
+///  ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.incbin' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncbinLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.incbin' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to process the included file.
+  if (ProcessIncbinFile(Filename)) {
+    Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
 /// ParseDirectiveIf
 /// ::= .if expression
 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index 299d281..222f237 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMMCParser
   MCAsmParserExtension.cpp
   MCTargetAsmParser.cpp
   )
-
-add_llvm_library_dependencies(LLVMMCParser
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d891126..ffc400b 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -476,6 +476,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
     .Case("common", MCSA_ELF_TypeCommon)
     .Case("notype", MCSA_ELF_TypeNoType)
     .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+    .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
     .Default(MCSA_Invalid);
 
   if (Attr == MCSA_Invalid)
diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt
index 83146a9..bcb0feb 100644
--- a/lib/MC/MCParser/LLVMBuild.txt
+++ b/lib/MC/MCParser/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = MCParser
 parent = MC
 required_libraries = MC Support
-
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a9219ad..e016f09 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -584,9 +584,14 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
     // requires the compiler to use .set to absolutize the differences between
     // symbols which the compiler knows to be assembly time constants, so we
     // don't need to worry about considering symbol differences fully resolved.
+    //
+    // If the file isn't using sub-sections-via-symbols, we can make the
+    // same assumptions about any symbol that we normally make about
+    // assembler locals.
 
     if (!Asm.getBackend().hasReliableSymbolDifference()) {
-      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+      if ((!SA.isTemporary() && Asm.getSubsectionsViaSymbols()) ||
+           !SA.isInSection() || &SecA != &SecB)
         return false;
       return true;
     }
@@ -628,7 +633,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
 }
 
 void MachObjectWriter::WriteObject(MCAssembler &Asm,
-				   const MCAsmLayout &Layout) {
+                                   const MCAsmLayout &Layout) {
   unsigned NumSections = Asm.size();
 
   // The section data starts after the header, the segment load command (and
@@ -731,7 +736,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
   // Write the actual section data.
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
-    Asm.WriteSectionData(it, Layout);
+    Asm.writeSectionData(it, Layout);
 
     uint64_t Pad = getPaddingSize(it, Layout);
     for (unsigned int i = 0; i < Pad; ++i)
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 4d3b59c..4052374 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -850,7 +850,7 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
         assert(OS.tell() == (*i)->Header.PointerToRawData &&
                "Section::PointerToRawData is insane!");
 
-        Asm.WriteSectionData(j, Layout);
+        Asm.writeSectionData(j, Layout);
       }
 
       if ((*i)->Relocations.size() > 0) {
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 86eb51a..c20fc0c 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMObject
   Object.cpp
   ObjectFile.cpp
   )
-
-add_llvm_library_dependencies(LLVMObject
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index c6ce562..bdf5431 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -101,7 +101,7 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
   return getSymbolName(symb, Result);
 }
 
-error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
+error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
                                             uint64_t &Result) const {
   const coff_symbol *symb = toSymb(Symb);
   const coff_section *Section = NULL;
@@ -113,7 +113,7 @@ error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
   if (Type == 'U' || Type == 'w')
     Result = UnknownAddressOrSize;
   else if (Section)
-    Result = Section->VirtualAddress + symb->Value;
+    Result = Section->PointerToRawData + symb->Value;
   else
     Result = symb->Value;
   return object_error::success;
@@ -131,11 +131,9 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
   if (Type == 'U' || Type == 'w')
     Result = UnknownAddressOrSize;
   else if (Section)
-    Result = reinterpret_cast<uintptr_t>(base() +
-                                         Section->PointerToRawData +
-                                         symb->Value);
+    Result = Section->VirtualAddress + symb->Value;
   else
-    Result = reinterpret_cast<uintptr_t>(base() + symb->Value);
+    Result = symb->Value;
   return object_error::success;
 }
 
@@ -283,7 +281,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
   if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED)
     Result = end_sections();
   else {
-    const coff_section *sec;
+    const coff_section *sec = 0;
     if (error_code ec = getSection(symb->SectionNumber, sec)) return ec;
     DataRefImpl Sec;
     std::memset(&Sec, 0, sizeof(Sec));
@@ -389,7 +387,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
                                                  bool &Result) const {
   const coff_section *sec = toSec(Sec);
   const coff_symbol *symb = toSymb(Symb);
-  const coff_section *symb_sec;
+  const coff_section *symb_sec = 0;
   if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
   if (symb_sec == sec)
     Result = true;
@@ -624,6 +622,11 @@ error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
   Res = toRel(Rel)->VirtualAddress;
   return object_error::success;
 }
+error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+                                               uint64_t &Res) const {
+  Res = toRel(Rel)->VirtualAddress;
+  return object_error::success;
+}
 error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
                                                SymbolRef &Res) const {
   const coff_relocation* R = toRel(Rel);
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index d1a43e7..a6c4c25 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -325,7 +325,7 @@ class ELFObjectFile : public ObjectFile {
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
@@ -355,6 +355,8 @@ protected:
                                        RelocationRef &Res) const;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const;
   virtual error_code getRelocationType(DataRefImpl Rel,
@@ -462,7 +464,7 @@ ELFObjectFile<target_endianness, is64Bits>
 
 template<support::endianness target_endianness, bool is64Bits>
 error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolOffset(DataRefImpl Symb,
+                        ::getSymbolFileOffset(DataRefImpl Symb,
                                           uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
@@ -486,7 +488,8 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   case ELF::STT_FUNC:
   case ELF::STT_OBJECT:
   case ELF::STT_NOTYPE:
-    Result = symb->st_value;
+    Result = symb->st_value +
+             (Section ? Section->sh_offset - Section->sh_addr : 0);
     return object_error::success;
   default:
     Result = UnknownAddressOrSize;
@@ -502,28 +505,25 @@ error_code ELFObjectFile<target_endianness, is64Bits>
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section;
   switch (getSymbolTableIndex(symb)) {
-  case ELF::SHN_COMMON: // Fall through.
+  case ELF::SHN_COMMON:
    // Undefined symbols have no address yet.
   case ELF::SHN_UNDEF:
     Result = UnknownAddressOrSize;
     return object_error::success;
   case ELF::SHN_ABS:
-    Result = reinterpret_cast<uintptr_t>(base()+symb->st_value);
+    Result = symb->st_value;
     return object_error::success;
   default: Section = getSection(symb);
   }
-  const uint8_t* addr = base();
-  if (Section)
-    addr += Section->sh_offset;
+
   switch (symb->getType()) {
   case ELF::STT_SECTION:
-    Result = reinterpret_cast<uintptr_t>(addr);
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
     return object_error::success;
-  case ELF::STT_FUNC: // Fall through.
-  case ELF::STT_OBJECT: // Fall through.
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
   case ELF::STT_NOTYPE:
-    addr += symb->st_value;
-    Result = reinterpret_cast<uintptr_t>(addr);
+    Result = symb->st_value;
     return object_error::success;
   default:
     Result = UnknownAddressOrSize;
@@ -922,6 +922,29 @@ error_code ELFObjectFile<target_endianness, is64Bits>
 
 template<support::endianness target_endianness, bool is64Bits>
 error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationOffset(DataRefImpl Rel,
+                                              uint64_t &Result) const {
+  uint64_t offset;
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      offset = getRel(Rel)->r_offset;
+      break;
+    }
+    case ELF::SHT_RELA : {
+      offset = getRela(Rel)->r_offset;
+      break;
+    }
+  }
+
+  Result = offset - sec->sh_addr;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
                         ::getRelocationType(DataRefImpl Rel,
                                             uint64_t &Result) const {
   const Elf_Shdr *sec = getSection(Rel.w.b);
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index 20fbb85..0041acd 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Object
 parent = Libraries
 required_libraries = Core Support
-
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 65ce5f8..4fa621b 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -125,23 +125,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI,
-                                             uint64_t &Result) const {
-  uint64_t SectionOffset;
-  uint8_t SectionIndex;
+error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
+                                                uint64_t &Result) const {
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
     getSymbol64TableEntry(DRI, Entry);
     Result = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
+    if (Entry->SectionIndex) {
+      InMemoryStruct<macho::Section64> Section;
+      getSection64(Sections[Entry->SectionIndex-1], Section);
+      Result += Section->Offset - Section->Address;
+    }
   } else {
     InMemoryStruct<macho::SymbolTableEntry> Entry;
     getSymbolTableEntry(DRI, Entry);
     Result = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
+    if (Entry->SectionIndex) {
+      InMemoryStruct<macho::Section> Section;
+      getSection(Sections[Entry->SectionIndex-1], Section);
+      Result += Section->Offset - Section->Address;
+    }
   }
-  getSectionAddress(Sections[SectionIndex-1], SectionOffset);
-  Result -= SectionOffset;
 
   return object_error::success;
 }
@@ -162,7 +166,64 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                           uint64_t &Result) const {
-  Result = UnknownAddressOrSize;
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  uint64_t BeginOffset;
+  uint64_t EndOffset = 0;
+  uint8_t SectionIndex;
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    BeginOffset = Entry->Value;
+    SectionIndex = Entry->SectionIndex;
+    if (!SectionIndex) {
+      Result = UnknownAddressOrSize;
+      return object_error::success;
+    }
+    // Unfortunately symbols are unsorted so we need to touch all
+    // symbols from load command
+    DRI.d.b = 0;
+    uint32_t Command = DRI.d.a;
+    while (Command == DRI.d.a) {
+      moveToNextSymbol(DRI);
+      if (DRI.d.a < LoadCommandCount) {
+        getSymbol64TableEntry(DRI, Entry);
+        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+          if (!EndOffset || Entry->Value < EndOffset)
+            EndOffset = Entry->Value;
+      }
+      DRI.d.b++;
+    }
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    BeginOffset = Entry->Value;
+    SectionIndex = Entry->SectionIndex;
+    if (!SectionIndex) {
+      Result = UnknownAddressOrSize;
+      return object_error::success;
+    }
+    // Unfortunately symbols are unsorted so we need to touch all
+    // symbols from load command
+    DRI.d.b = 0;
+    uint32_t Command = DRI.d.a;
+    while (Command == DRI.d.a) {
+      moveToNextSymbol(DRI);
+      if (DRI.d.a < LoadCommandCount) {
+        getSymbolTableEntry(DRI, Entry);
+        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+          if (!EndOffset || Entry->Value < EndOffset)
+            EndOffset = Entry->Value;
+      }
+      DRI.d.b++;
+    }
+  }
+  if (!EndOffset) {
+    uint64_t Size;
+    getSectionSize(Sections[SectionIndex-1], Size);
+    getSectionAddress(Sections[SectionIndex-1], EndOffset);
+    EndOffset += Size;
+  }
+  Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
@@ -275,7 +336,7 @@ error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
   if (index == 0)
     Res = end_sections();
   else
-    Res = section_iterator(SectionRef(Sections[index], this));
+    Res = section_iterator(SectionRef(Sections[index-1], this));
 
   return object_error::success;
 }
@@ -614,7 +675,7 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
   bool isScattered = (Arch != Triple::x86_64) &&
                      (RE->Word0 & macho::RF_Scattered);
   uint64_t RelAddr = 0;
-  if (isScattered) 
+  if (isScattered)
     RelAddr = RE->Word0 & 0xFFFFFF;
   else
     RelAddr = RE->Word0;
@@ -622,6 +683,20 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
   Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
   return object_error::success;
 }
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+                                                uint64_t &Res) const {
+  InMemoryStruct<macho::RelocationEntry> RE;
+  getRelocation(Rel, RE);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+  if (isScattered)
+    Res = RE->Word0 & 0xFFFFFF;
+  else
+    Res = RE->Word0;
+  return object_error::success;
+}
 error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
                                                 SymbolRef &Res) const {
   InMemoryStruct<macho::RelocationEntry> RE;
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 719bf88..f061ea7 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -150,9 +150,9 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
   return ret;
 }
 
-uint64_t LLVMGetSymbolOffset(LLVMSymbolIteratorRef SI) {
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
   uint64_t ret;
-  if (error_code ec = (*unwrap(SI))->getOffset(ret))
+  if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
     report_fatal_error(ec.message());
   return ret;
 }
@@ -172,6 +172,13 @@ uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
   return ret;
 }
 
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(RI))->getOffset(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
 LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
   SymbolRef ret;
   if (error_code ec = (*unwrap(RI))->getSymbol(ret))
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f238894..70e7afd 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1854,20 +1854,33 @@ APFloat::convert(const fltSemantics &toSemantics,
   lostFraction lostFraction;
   unsigned int newPartCount, oldPartCount;
   opStatus fs;
+  int shift;
+  const fltSemantics &fromSemantics = *semantics;
 
-  assertArithmeticOK(*semantics);
+  assertArithmeticOK(fromSemantics);
   assertArithmeticOK(toSemantics);
   lostFraction = lfExactlyZero;
   newPartCount = partCountForBits(toSemantics.precision + 1);
   oldPartCount = partCount();
+  shift = toSemantics.precision - fromSemantics.precision;
+
+  bool X86SpecialNan = false;
+  if (&fromSemantics == &APFloat::x87DoubleExtended &&
+      &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+      (!(*significandParts() & 0x8000000000000000ULL) ||
+       !(*significandParts() & 0x4000000000000000ULL))) {
+    // x86 has some unusual NaNs which cannot be represented in any other
+    // format; note them here.
+    X86SpecialNan = true;
+  }
+
+  // If this is a truncation, perform the shift before we narrow the storage.
+  if (shift < 0 && (category==fcNormal || category==fcNaN))
+    lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
 
-  /* Handle storage complications.  If our new form is wider,
-     re-allocate our bit pattern into wider storage.  If it is
-     narrower, we ignore the excess parts, but if narrowing to a
-     single part we need to free the old storage.
-     Be careful not to reference significandParts for zeroes
-     and infinities, since it aborts.  */
+  // Fix the storage so it can hold to new value.
   if (newPartCount > oldPartCount) {
+    // The new type requires more storage; make it available.
     integerPart *newParts;
     newParts = new integerPart[newPartCount];
     APInt::tcSet(newParts, 0, newPartCount);
@@ -1875,61 +1888,36 @@ APFloat::convert(const fltSemantics &toSemantics,
       APInt::tcAssign(newParts, significandParts(), oldPartCount);
     freeSignificand();
     significand.parts = newParts;
-  } else if (newPartCount < oldPartCount) {
-    /* Capture any lost fraction through truncation of parts so we get
-       correct rounding whilst normalizing.  */
-    if (category==fcNormal)
-      lostFraction = lostFractionThroughTruncation
-        (significandParts(), oldPartCount, toSemantics.precision);
-    if (newPartCount == 1) {
-        integerPart newPart = 0;
-        if (category==fcNormal || category==fcNaN)
-          newPart = significandParts()[0];
-        freeSignificand();
-        significand.part = newPart;
-    }
+  } else if (newPartCount == 1 && oldPartCount != 1) {
+    // Switch to built-in storage for a single part.
+    integerPart newPart = 0;
+    if (category==fcNormal || category==fcNaN)
+      newPart = significandParts()[0];
+    freeSignificand();
+    significand.part = newPart;
   }
 
+  // Now that we have the right storage, switch the semantics.
+  semantics = &toSemantics;
+
+  // If this is an extension, perform the shift now that the storage is
+  // available.
+  if (shift > 0 && (category==fcNormal || category==fcNaN))
+    APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+
   if (category == fcNormal) {
-    /* Re-interpret our bit-pattern.  */
-    exponent += toSemantics.precision - semantics->precision;
-    semantics = &toSemantics;
     fs = normalize(rounding_mode, lostFraction);
     *losesInfo = (fs != opOK);
   } else if (category == fcNaN) {
-    int shift = toSemantics.precision - semantics->precision;
-    // Do this now so significandParts gets the right answer
-    const fltSemantics *oldSemantics = semantics;
-    semantics = &toSemantics;
-    *losesInfo = false;
-    // No normalization here, just truncate
-    if (shift>0)
-      APInt::tcShiftLeft(significandParts(), newPartCount, shift);
-    else if (shift < 0) {
-      unsigned ushift = -shift;
-      // Figure out if we are losing information.  This happens
-      // if are shifting out something other than 0s, or if the x87 long
-      // double input did not have its integer bit set (pseudo-NaN), or if the
-      // x87 long double input did not have its QNan bit set (because the x87
-      // hardware sets this bit when converting a lower-precision NaN to
-      // x87 long double).
-      if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
-        *losesInfo = true;
-      if (oldSemantics == &APFloat::x87DoubleExtended &&
-          (!(*significandParts() & 0x8000000000000000ULL) ||
-           !(*significandParts() & 0x4000000000000000ULL)))
-        *losesInfo = true;
-      APInt::tcShiftRight(significandParts(), newPartCount, ushift);
-    }
+    *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
     // does not give you back the same bits.  This is dubious, and we
     // don't currently do it.  You're really supposed to get
     // an invalid operation signal at runtime, but nobody does that.
     fs = opOK;
   } else {
-    semantics = &toSemantics;
-    fs = opOK;
     *losesInfo = false;
+    fs = opOK;
   }
 
   return fs;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 55cb433..506225f 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1440,15 +1440,11 @@ APInt APInt::sqrt() const {
   APInt nextSquare((x_old + 1) * (x_old +1));
   if (this->ult(square))
     return x_old;
-  else if (this->ule(nextSquare)) {
-    APInt midpoint((nextSquare - square).udiv(two));
-    APInt offset(*this - square);
-    if (offset.ult(midpoint))
-      return x_old;
-    else
-      return x_old + 1;
-  } else
-    llvm_unreachable("Error in APInt::sqrt computation");
+  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+  APInt midpoint((nextSquare - square).udiv(two));
+  APInt offset(*this - square);
+  if (offset.ult(midpoint))
+    return x_old;
   return x_old + 1;
 }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 4b43ae9..ce93449 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -57,6 +57,9 @@ TEMPLATE_INSTANTIATION(class opt<char>);
 TEMPLATE_INSTANTIATION(class opt<bool>);
 } } // end namespace llvm::cl
 
+void GenericOptionValue::anchor() {}
+void OptionValue<boolOrDefault>::anchor() {}
+void OptionValue<std::string>::anchor() {}
 void Option::anchor() {}
 void basic_parser_impl::anchor() {}
 void parser<bool>::anchor() {}
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 8145664..1e89c6a 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -350,6 +350,9 @@ DAGDeltaAlgorithmImpl::Run() {
   return Required;
 }
 
+void DAGDeltaAlgorithm::anchor() {
+}
+
 DAGDeltaAlgorithm::changeset_ty
 DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
                        const std::vector<edge_ty> &Dependencies) {
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index a19e4b4..86d1c5d 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -298,6 +298,8 @@ std::string sys::getHostCPUName() {
         }
       case 16:
         return "amdfam10";
+      case 21:
+        return "bdver1";
     default:
       return "generic";
     }
diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt
index f32ef8f..5b88be0 100644
--- a/lib/Support/LLVMBuild.txt
+++ b/lib/Support/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Library
 name = Support
 parent = Libraries
-
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 8874e94..6a873cb 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -19,7 +19,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index a4d49dc..dcddeda 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -90,7 +90,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
     case 0xCF: {
       uint16_t type = 0;
       if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
-          magic[2] == char(0xFA) && 
+          magic[2] == char(0xFA) &&
           (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
         /* Native endian */
         if (length >= 16) type = magic[14] << 8 | magic[15];
@@ -152,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
 
 bool
 Path::isArchive() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
-  return type == Archive_FileType;
+  return type == fs::file_magic::archive;
 }
 
 bool
 Path::isDynamicLibrary() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
   switch (type) {
     default: return false;
-    case Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case ELF_SharedObject_FileType:
-    case COFF_FileType:  return true;
+    case fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+    case fs::file_magic::macho_dynamically_linked_shared_lib:
+    case fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+    case fs::file_magic::elf_shared_object:
+    case fs::file_magic::pecoff_executable:  return true;
   }
 }
 
 bool
 Path::isObjectFile() const {
-  LLVMFileType type;
-  if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+  fs::file_magic type;
+  if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown)
     return false;
   return true;
 }
@@ -212,10 +212,10 @@ Path::appendSuffix(StringRef suffix) {
 
 bool
 Path::isBitcodeFile() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
-  return type == Bitcode_FileType;
+  return type == fs::file_magic::bitcode;
 }
 
 bool Path::hasMagicNumber(StringRef Magic) const {
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index bebe442..7cc434b 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cctype>
 #include <cstdio>
@@ -492,7 +493,7 @@ bool is_separator(char value) {
 
 void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
   result.clear();
-  
+
   // Check whether the temporary directory is specified by an environment
   // variable.
   const char *EnvironmentVariable;
@@ -505,7 +506,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
     result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
     return;
   }
-    
+
   // Fall back to a system default.
   const char *DefaultResult;
 #ifdef LLVM_ON_WIN32
@@ -519,7 +520,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
 #endif
   result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
 }
-  
+
 bool has_root_name(const Twine &path) {
   SmallString<128> path_storage;
   StringRef p = path.toStringRef(path_storage);
@@ -738,13 +739,124 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
   return success;
 }
 
-error_code identify_magic(const Twine &path, LLVMFileType &result) {
+/// @brief Identify the magic in magic.
+file_magic identify_magic(StringRef magic) {
+  switch ((unsigned char)magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return file_magic::bitcode;
+      break;
+    case 'B':
+      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+        return file_magic::bitcode;
+      break;
+    case '!':
+      if (magic.size() >= 8)
+        if (memcmp(magic.data(),"!<arch>\n",8) == 0)
+          return file_magic::archive;
+      break;
+
+    case '\177':
+      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+        if (magic.size() >= 18 && magic[17] == 0)
+          switch (magic[16]) {
+            default: break;
+            case 1: return file_magic::elf_relocatable;
+            case 2: return file_magic::elf_executable;
+            case 3: return file_magic::elf_shared_object;
+            case 4: return file_magic::elf_core;
+          }
+      }
+      break;
+
+    case 0xCA:
+      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+          magic[3] == char(0xBE)) {
+        // This is complicated by an overlap with Java class files.
+        // See the Mach-O section in /usr/share/file/magic for details.
+        if (magic.size() >= 8 && magic[7] < 43)
+          // FIXME: Universal Binary of any type.
+          return file_magic::macho_dynamically_linked_shared_lib;
+      }
+      break;
+
+      // The two magic numbers for mach-o are:
+      // 0xfeedface - 32-bit mach-o
+      // 0xfeedfacf - 64-bit mach-o
+    case 0xFE:
+    case 0xCE:
+    case 0xCF: {
+      uint16_t type = 0;
+      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+          magic[2] == char(0xFA) &&
+          (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+        /* Native endian */
+        if (magic.size() >= 16) type = magic[14] << 8 | magic[15];
+      } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+                 magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+                 magic[3] == char(0xFE)) {
+        /* Reverse endian */
+        if (magic.size() >= 14) type = magic[13] << 8 | magic[12];
+      }
+      switch (type) {
+        default: break;
+        case 1: return file_magic::macho_object;
+        case 2: return file_magic::macho_executable;
+        case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
+        case 4: return file_magic::macho_core;
+        case 5: return file_magic::macho_preload_executabl;
+        case 6: return file_magic::macho_dynamically_linked_shared_lib;
+        case 7: return file_magic::macho_dynamic_linker;
+        case 8: return file_magic::macho_bundle;
+        case 9: return file_magic::macho_dynamic_linker;
+        case 10: return file_magic::macho_dsym_companion;
+      }
+      break;
+    }
+    case 0xF0: // PowerPC Windows
+    case 0x83: // Alpha 32-bit
+    case 0x84: // Alpha 64-bit
+    case 0x66: // MPS R4000 Windows
+    case 0x50: // mc68K
+    case 0x4c: // 80386 Windows
+      if (magic[1] == 0x01)
+        return file_magic::coff_object;
+
+    case 0x90: // PA-RISC Windows
+    case 0x68: // mc68K Windows
+      if (magic[1] == 0x02)
+        return file_magic::coff_object;
+      break;
+
+    case 0x4d: // Possible MS-DOS stub on Windows PE file
+      if (magic[1] == 0x5a) {
+        uint32_t off =
+          *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c);
+        // PE/COFF file, either EXE or DLL.
+        if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0)
+          return file_magic::pecoff_executable;
+      }
+      break;
+
+    case 0x64: // x86-64 Windows.
+      if (magic[1] == char(0x86))
+        return file_magic::coff_object;
+      break;
+
+    default:
+      break;
+  }
+  return file_magic::unknown;
+}
+
+error_code identify_magic(const Twine &path, file_magic &result) {
   SmallString<32> Magic;
   error_code ec = get_magic(path, Magic.capacity(), Magic);
   if (ec && ec != errc::value_too_large)
     return ec;
 
-  result = IdentifyFileType(Magic.data(), Magic.size());
+  result = identify_magic(Magic);
   return success;
 }
 
@@ -753,7 +865,9 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
   if (ft == file_type::directory_file) {
     // This code would be a lot better with exceptions ;/.
     error_code ec;
-    for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+    directory_iterator i(path, ec);
+    if (ec) return ec;
+    for (directory_iterator e; i != e; i.increment(ec)) {
       if (ec) return ec;
       file_status st;
       if (error_code ec = i->status(st)) return ec;
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 01860b0..75bc282 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Program.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 using namespace sys;
 
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index d0b1e10..d14b976 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -20,7 +20,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 04a44a0..d8a6ad3 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -73,9 +73,12 @@ void Statistic::RegisterStatistic() {
     if (Enabled)
       StatInfo->addStatistic(this);
 
+    TsanHappensBefore(this);
     sys::MemoryFence();
     // Remember we have been registered.
+    TsanIgnoreWritesBegin();
     Initialized = true;
+    TsanIgnoreWritesEnd();
   }
 }
 
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index fdb251c..08b12b6 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -19,7 +19,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 8f0bb93..7483225 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -24,7 +24,7 @@ static bool multithreaded_mode = false;
 static sys::Mutex* global_lock = 0;
 
 bool llvm::llvm_start_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
   assert(!multithreaded_mode && "Already multithreaded!");
   multithreaded_mode = true;
   global_lock = new sys::Mutex(true);
@@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() {
 }
 
 void llvm::llvm_stop_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
   assert(multithreaded_mode && "Not currently multithreaded!");
 
   // We fence here to insure that all threaded operations are complete BEFORE we
@@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() {
   if (multithreaded_mode) global_lock->release();
 }
 
-#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
+#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
 #include <pthread.h>
 
 struct ThreadInfo {
@@ -102,7 +102,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
  error:
   ::pthread_attr_destroy(&Attr);
 }
-#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
 #include "Windows/Windows.h"
 #include <process.h>
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index ac4f005..8f58e70 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -20,6 +20,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
 
   case arm:     return "arm";
   case cellspu: return "cellspu";
+  case hexagon: return "hexagon";
   case mips:    return "mips";
   case mipsel:  return "mipsel";
   case mips64:  return "mips64";
@@ -59,6 +60,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
 
   case mblaze:  return "mblaze";
 
+  case hexagon:   return "hexagon";
+
   case sparcv9:
   case sparc:   return "sparc";
 
@@ -150,6 +153,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return ppc;
   if (Name == "mblaze")
     return mblaze;
+  if (Name == "hexagon")
+    return hexagon;
   if (Name == "sparc")
     return sparc;
   if (Name == "sparcv9")
@@ -295,6 +300,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
     return mips64;
   else if (ArchName == "mips64el")
     return mips64el;
+  else if (ArchName == "hexagon")
+    return hexagon;
   else if (ArchName == "sparc")
     return sparc;
   else if (ArchName == "sparcv9")
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index b5488de..aebb4ab 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -275,28 +275,17 @@ error_code exists(const Twine &path, bool &result) {
   return success;
 }
 
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
-  // Get arguments.
-  SmallString<128> a_storage;
-  SmallString<128> b_storage;
-  StringRef a = A.toNullTerminatedStringRef(a_storage);
-  StringRef b = B.toNullTerminatedStringRef(b_storage);
-
-  struct stat stat_a, stat_b;
-  int error_b = ::stat(b.begin(), &stat_b);
-  int error_a = ::stat(a.begin(), &stat_a);
-
-  // If both are invalid, it's an error. If only one is, the result is false.
-  if (error_a != 0 || error_b != 0) {
-    if (error_a == error_b)
-      return error_code(errno, system_category());
-    result = false;
-  } else {
-    result =
-      stat_a.st_dev == stat_b.st_dev &&
-      stat_a.st_ino == stat_b.st_ino;
-  }
+bool equivalent(file_status A, file_status B) {
+  assert(status_known(A) && status_known(B));
+  return A.st_dev == B.st_dev &&
+         A.st_ino == B.st_ino;
+}
 
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  file_status fsA, fsB;
+  if (error_code ec = status(A, fsA)) return ec;
+  if (error_code ec = status(B, fsB)) return ec;
+  result = equivalent(fsA, fsB);
   return success;
 }
 
@@ -343,6 +332,9 @@ error_code status(const Twine &path, file_status &result) {
   else
     result = file_status(file_type::type_unknown);
 
+  result.st_dev = status.st_dev;
+  result.st_ino = status.st_ino;
+
   return success;
 }
 
@@ -441,7 +433,8 @@ rety_open_create:
   return success;
 }
 
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+                                                StringRef path){
   SmallString<128> path_null(path);
   DIR *directory = ::opendir(path_null.c_str());
   if (directory == 0)
@@ -454,7 +447,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
   return directory_iterator_increment(it);
 }
 
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
   if (it.IterationHandle)
     ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
   it.IterationHandle = 0;
@@ -462,7 +455,7 @@ error_code directory_iterator_destruct(directory_iterator& it) {
   return success;
 }
 
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
   errno = 0;
   dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
   if (cur_dir == 0 && errno != 0) {
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 346baf1..e5990d0 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -412,19 +412,19 @@ Program::Kill(std::string* ErrMsg) {
   return false;
 }
 
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
 }
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 078d705..2b250a3 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -53,6 +53,7 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
 
 #endif  // !HAVE_VALGRIND_VALGRIND_H
 
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
 // These functions require no implementation, tsan just looks at the arguments
 // they're called with.
 extern "C" {
@@ -63,3 +64,4 @@ void AnnotateHappensAfter(const char *file, int line,
 void AnnotateIgnoreWritesBegin(const char *file, int line) {}
 void AnnotateIgnoreWritesEnd(const char *file, int line) {}
 }
+#endif
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index bc597b2..7ca33c0 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <wincrypt.h>
 #include <fcntl.h>
 #include <io.h>
 #include <sys/stat.h>
@@ -112,14 +111,6 @@ namespace {
     return success;
   }
 
-  // Forwarder for ScopedHandle.
-  BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
-    return ::CryptReleaseContext(Provider, 0);
-  }
-
-  typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
-                       BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
-    ScopedCryptContext;
   bool is_separator(const wchar_t value) {
     switch (value) {
     case L'\\':
@@ -359,68 +350,22 @@ error_code exists(const Twine &path, bool &result) {
   return success;
 }
 
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
-  // Get arguments.
-  SmallString<128> a_storage;
-  SmallString<128> b_storage;
-  StringRef a = A.toStringRef(a_storage);
-  StringRef b = B.toStringRef(b_storage);
-
-  // Convert to utf-16.
-  SmallVector<wchar_t, 128> wide_a;
-  SmallVector<wchar_t, 128> wide_b;
-  if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
-  if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
-
-  AutoHandle HandleB(
-    ::CreateFileW(wide_b.begin(),
-                  0,
-                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
-                  0,
-                  OPEN_EXISTING,
-                  FILE_FLAG_BACKUP_SEMANTICS,
-                  0));
-
-  AutoHandle HandleA(
-    ::CreateFileW(wide_a.begin(),
-                  0,
-                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
-                  0,
-                  OPEN_EXISTING,
-                  FILE_FLAG_BACKUP_SEMANTICS,
-                  0));
-
-  // If both handles are invalid, it's an error.
-  if (HandleA == INVALID_HANDLE_VALUE &&
-      HandleB == INVALID_HANDLE_VALUE)
-    return windows_error(::GetLastError());
-
-  // If only one is invalid, it's false.
-  if (HandleA == INVALID_HANDLE_VALUE &&
-      HandleB == INVALID_HANDLE_VALUE) {
-    result = false;
-    return success;
-  }
-
-  // Get file information.
-  BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
-  if (!::GetFileInformationByHandle(HandleA, &InfoA))
-    return windows_error(::GetLastError());
-  if (!::GetFileInformationByHandle(HandleB, &InfoB))
-    return windows_error(::GetLastError());
-
-  // See if it's all the same.
-  result =
-    InfoA.dwVolumeSerialNumber           == InfoB.dwVolumeSerialNumber &&
-    InfoA.nFileIndexHigh                 == InfoB.nFileIndexHigh &&
-    InfoA.nFileIndexLow                  == InfoB.nFileIndexLow &&
-    InfoA.nFileSizeHigh                  == InfoB.nFileSizeHigh &&
-    InfoA.nFileSizeLow                   == InfoB.nFileSizeLow &&
-    InfoA.ftLastWriteTime.dwLowDateTime  ==
-      InfoB.ftLastWriteTime.dwLowDateTime &&
-    InfoA.ftLastWriteTime.dwHighDateTime ==
-      InfoB.ftLastWriteTime.dwHighDateTime;
+bool equivalent(file_status A, file_status B) {
+  assert(status_known(A) && status_known(B));
+  return A.FileIndexHigh      == B.FileIndexHigh &&
+         A.FileIndexLow       == B.FileIndexLow &&
+         A.FileSizeHigh       == B.FileSizeHigh &&
+         A.FileSizeLow        == B.FileSizeLow &&
+         A.LastWriteTimeHigh  == B.LastWriteTimeHigh &&
+         A.LastWriteTimeLow   == B.LastWriteTimeLow &&
+         A.VolumeSerialNumber == B.VolumeSerialNumber;
+}
 
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  file_status fsA, fsB;
+  if (error_code ec = status(A, fsA)) return ec;
+  if (error_code ec = status(B, fsB)) return ec;
+  result = equivalent(fsA, fsB);
   return success;
 }
 
@@ -478,8 +423,7 @@ error_code status(const Twine &path, file_status &result) {
     return success;
   }
 
-  if (error_code ec = UTF8ToUTF16(path8,
-                                  path_utf16))
+  if (error_code ec = UTF8ToUTF16(path8, path_utf16))
     return ec;
 
   DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -488,7 +432,7 @@ error_code status(const Twine &path, file_status &result) {
 
   // Handle reparse points.
   if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
-    AutoHandle h(
+    ScopedFileHandle h(
       ::CreateFileW(path_utf16.begin(),
                     0, // Attributes only.
                     FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -496,14 +440,35 @@ error_code status(const Twine &path, file_status &result) {
                     OPEN_EXISTING,
                     FILE_FLAG_BACKUP_SEMANTICS,
                     0));
-    if (h == INVALID_HANDLE_VALUE)
+    if (!h)
       goto handle_status_error;
   }
 
   if (attr & FILE_ATTRIBUTE_DIRECTORY)
     result = file_status(file_type::directory_file);
-  else
+  else {
     result = file_status(file_type::regular_file);
+    ScopedFileHandle h(
+      ::CreateFileW(path_utf16.begin(),
+                    0, // Attributes only.
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL,
+                    OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS,
+                    0));
+    if (!h)
+      goto handle_status_error;
+    BY_HANDLE_FILE_INFORMATION Info;
+    if (!::GetFileInformationByHandle(h, &Info))
+      goto handle_status_error;
+    result.FileIndexHigh      = Info.nFileIndexHigh;
+    result.FileIndexLow       = Info.nFileIndexLow;
+    result.FileSizeHigh       = Info.nFileSizeHigh;
+    result.FileSizeLow        = Info.nFileSizeLow;
+    result.LastWriteTimeHigh  = Info.ftLastWriteTime.dwHighDateTime;
+    result.LastWriteTimeLow   = Info.ftLastWriteTime.dwLowDateTime;
+    result.VolumeSerialNumber = Info.dwVolumeSerialNumber;
+  }
 
   return success;
 
@@ -535,7 +500,7 @@ error_code unique_file(const Twine &model, int &result_fd,
   if (makeAbsolute) {
     // Make model absolute by prepending a temp directory if it's not already.
     bool absolute = path::is_absolute(m);
-  
+
     if (!absolute) {
       SmallVector<wchar_t, 64> temp_dir;
       if (error_code ec = TempDir(temp_dir)) return ec;
@@ -691,7 +656,8 @@ error_code get_magic(const Twine &path, uint32_t len,
   return success;
 }
 
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+                                                StringRef path){
   SmallVector<wchar_t, 128> path_utf16;
 
   if (error_code ec = UTF8ToUTF16(path,
@@ -722,7 +688,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
       error_code ec = windows_error(::GetLastError());
       // Check for end.
       if (ec == windows_error::no_more_files)
-        return directory_iterator_destruct(it);
+        return detail::directory_iterator_destruct(it);
       return ec;
     } else
       FilenameLen = ::wcslen(FirstFind.cFileName);
@@ -742,7 +708,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
   return success;
 }
 
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
   if (it.IterationHandle != 0)
     // Closes the handle if it's valid.
     ScopedFindHandle close(HANDLE(it.IterationHandle));
@@ -751,13 +717,13 @@ error_code directory_iterator_destruct(directory_iterator& it) {
   return success;
 }
 
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
   WIN32_FIND_DATAW FindData;
   if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
     error_code ec = windows_error(::GetLastError());
     // Check for end.
     if (ec == windows_error::no_more_files)
-      return directory_iterator_destruct(it);
+      return detail::directory_iterator_destruct(it);
     return ec;
   }
 
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 7e38168..80ccaa6 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -299,14 +299,14 @@ Program::Execute(const Path& path,
   Data_ = wpi;
 
   // Make sure these get closed no matter what.
-  AutoHandle hThread(pi.hThread);
+  ScopedCommonHandle hThread(pi.hThread);
 
   // Assign the process to a job if a memory limit is defined.
-  AutoHandle hJob(0);
+  ScopedJobHandle hJob;
   if (memoryLimit != 0) {
     hJob = CreateJobObject(0, 0);
     bool success = false;
-    if (hJob != 0) {
+    if (hJob) {
       JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
       memset(&jeli, 0, sizeof(jeli));
       jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
@@ -367,7 +367,17 @@ Program::Wait(const Path &path,
     return -2;
   }
 
-  return status & 0377;
+  if (!status)
+    return 0;
+
+  // Pass 10(Warning) and 11(Error) to the callee as negative value.
+  if ((status & 0xBFFF0000U) == 0x80000000U)
+    return (int)status;
+
+  if (status & 0xFF)
+    return status & 0x7FFFFFFF;
+
+  return 1;
 }
 
 bool
@@ -387,19 +397,25 @@ Program::Kill(std::string* ErrMsg) {
   return false;
 }
 
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
   int result = _setmode( _fileno(stdin), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
   int result = _setmode( _fileno(stdout), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
   int result = _setmode( _fileno(stderr), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
 }
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 0d4b8a2..3a7e90b 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -446,7 +446,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
   }
 
   if (ExitOnUnhandledExceptions)
-    _exit(-3);
+    _exit(ep->ExceptionRecord->ExceptionCode);
 
   // Allow dialog box to pop up allowing choice to start debugger.
   if (OldFilter)
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 67b6f01..5c1da0d 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -26,6 +26,7 @@
 
 #include "llvm/Config/config.h" // Get build system configuration settings
 #include <windows.h>
+#include <wincrypt.h>
 #include <shlobj.h>
 #include <cassert>
 #include <string>
@@ -41,70 +42,99 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
   return true;
 }
 
-class AutoHandle {
-  HANDLE handle;
+template <typename HandleTraits>
+class ScopedHandle {
+  typedef typename HandleTraits::handle_type handle_type;
+  handle_type Handle;
 
+  ScopedHandle(const ScopedHandle &other); // = delete;
+  void operator=(const ScopedHandle &other); // = delete;
 public:
-  AutoHandle(HANDLE h) : handle(h) {}
+  ScopedHandle()
+    : Handle(HandleTraits::GetInvalid()) {}
+
+  explicit ScopedHandle(handle_type h)
+    : Handle(h) {}
 
-  ~AutoHandle() {
-    if (handle)
-      CloseHandle(handle);
+  ~ScopedHandle() {
+    if (HandleTraits::IsValid(Handle))
+      HandleTraits::Close(Handle);
   }
 
-  operator HANDLE() {
-    return handle;
+  handle_type take() {
+    handle_type t = Handle;
+    Handle = HandleTraits::GetInvalid();
+    return t;
   }
 
-  AutoHandle &operator=(HANDLE h) {
-    handle = h;
+  ScopedHandle &operator=(handle_type h) {
+    if (HandleTraits::IsValid(Handle))
+      HandleTraits::Close(Handle);
+    Handle = h;
     return *this;
   }
+
+  // True if Handle is valid.
+  operator bool() const {
+    return HandleTraits::IsValid(Handle) ? true : false;
+  }
+
+  operator handle_type() const {
+    return Handle;
+  }
 };
 
-template <class HandleType, uintptr_t InvalidHandle,
-          class DeleterType, DeleterType D>
-class ScopedHandle {
-  HandleType Handle;
+struct CommonHandleTraits {
+  typedef HANDLE handle_type;
 
-public:
-  ScopedHandle() : Handle(InvalidHandle) {}
-  ScopedHandle(HandleType handle) : Handle(handle) {}
+  static handle_type GetInvalid() {
+    return INVALID_HANDLE_VALUE;
+  }
 
-  ~ScopedHandle() {
-    if (Handle != HandleType(InvalidHandle))
-      D(Handle);
+  static void Close(handle_type h) {
+    ::CloseHandle(h);
   }
 
-  HandleType take() {
-    HandleType temp = Handle;
-    Handle = HandleType(InvalidHandle);
-    return temp;
+  static bool IsValid(handle_type h) {
+    return h != GetInvalid();
   }
+};
 
-  operator HandleType() const { return Handle; }
+struct JobHandleTraits : CommonHandleTraits {
+  static handle_type GetInvalid() {
+    return NULL;
+  }
+};
 
-  ScopedHandle &operator=(HandleType handle) {
-    Handle = handle;
-    return *this;
+struct CryptContextTraits : CommonHandleTraits {
+  typedef HCRYPTPROV handle_type;
+
+  static handle_type GetInvalid() {
+    return 0;
   }
 
-  typedef void (*unspecified_bool_type)();
-  static void unspecified_bool_true() {}
+  static void Close(handle_type h) {
+    ::CryptReleaseContext(h, 0);
+  }
 
-  // True if Handle is valid.
-  operator unspecified_bool_type() const {
-    return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+  static bool IsValid(handle_type h) {
+    return h != GetInvalid();
   }
+};
 
-  bool operator!() const {
-    return Handle == HandleType(InvalidHandle);
+struct FindHandleTraits : CommonHandleTraits {
+  static void Close(handle_type h) {
+    ::FindClose(h);
   }
 };
 
-typedef ScopedHandle<HANDLE, uintptr_t(-1),
-                      BOOL (WINAPI*)(HANDLE), ::FindClose>
-  ScopedFindHandle;
+struct FileHandleTraits : CommonHandleTraits {};
+
+typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
+typedef ScopedHandle<FileHandleTraits>   ScopedFileHandle;
+typedef ScopedHandle<CryptContextTraits> ScopedCryptContext;
+typedef ScopedHandle<FindHandleTraits>   ScopedFindHandle;
+typedef ScopedHandle<JobHandleTraits>    ScopedJobHandle;
 
 namespace llvm {
 template <class T>
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 4927e9a..72d3986 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cctype>
 #include <cerrno>
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 0db4134..e678087 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMTableGen
   TGLexer.cpp
   TGParser.cpp
   )
-
-add_llvm_library_dependencies(LLVMTableGen
-  LLVMSupport
-  )
diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt
index 4e24c37..54cedfd 100644
--- a/lib/TableGen/LLVMBuild.txt
+++ b/lib/TableGen/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = TableGen
 parent = Libraries
 required_libraries = Support
-
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index c06add4..8bcb029 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -2219,6 +2219,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
       Record *DefProto = MC->DefPrototypes[i];
 
       Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+      if (!CurRec)
+        return true;
 
       if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
                                    TArgs, TemplateVals, true/*Delete args*/))
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index bbca228..6ae287a 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -493,11 +493,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       return false;
     }
 
-    // These modifiers are not yet supported.
-    case 'p': // The high single-precision register of a VFP double-precision
-              // register.
     case 'e': // The low doubleword register of a NEON quad register.
-    case 'f': // The high doubleword register of a NEON quad register.
+    case 'f': { // The high doubleword register of a NEON quad register.
+      if (!MI->getOperand(OpNum).isReg())
+        return true;
+      unsigned Reg = MI->getOperand(OpNum).getReg();
+      if (!ARM::QPRRegClass.contains(Reg))
+        return true;
+      const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+      unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+                                       ARM::dsub_0 : ARM::dsub_1);
+      O << ARMInstPrinter::getRegisterName(SubReg);
+      return false;
+    }
+
+    // These modifiers are not yet supported.
     case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
     case 'H': // The highest-numbered register of a pair.
       return true;
@@ -739,14 +749,14 @@ void ARMAsmPrinter::emitAttributes() {
   }
 
   // Signal various FP modes.
-  if (!UnsafeFPMath) {
+  if (!TM.Options.UnsafeFPMath) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                                ARMBuildAttrs::Allowed);
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
                                ARMBuildAttrs::Allowed);
   }
 
-  if (NoInfsFPMath && NoNaNsFPMath)
+  if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
                                ARMBuildAttrs::Allowed);
   else
@@ -759,7 +769,7 @@ void ARMAsmPrinter::emitAttributes() {
   AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
 
   // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
-  if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+  if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
   }
@@ -1069,7 +1079,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   }
 
   // Try to figure out the unwinding opcode out of src / dst regs.
-  if (MI->getDesc().mayStore()) {
+  if (MI->mayStore()) {
     // Register saves.
     assert(DstReg == ARM::SP &&
            "Only stack pointer as a destination reg is supported");
@@ -1481,11 +1491,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     /// in the function.  The first operand is the ID# for this instruction, the
     /// second is the index into the MachineConstantPool that this is, the third
     /// is the size in bytes of this constant pool entry.
+    /// The required alignment is specified on the basic block holding this MI.
     unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
     unsigned CPIdx   = (unsigned)MI->getOperand(1).getIndex();
 
-    EmitAlignment(2);
-
     // Mark the constant pool entry as data if we're not already in a data
     // region.
     OutStreamer.EmitDataRegion();
@@ -1934,4 +1943,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
   RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
   RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
 }
-
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9315348..8bf5475 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -146,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned NumOps = MCID.getNumOperands();
-  bool isLoad = !MCID.mayStore();
+  bool isLoad = !MI->mayStore();
   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
   const MachineOperand &Base = MI->getOperand(2);
   const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -439,6 +439,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+  if (MI->isBundle()) {
+    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+    while (++I != E && I->isInsideBundle()) {
+      int PIdx = I->findFirstPredOperandIdx();
+      if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+        return true;
+    }
+    return false;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
 bool ARMBaseInstrInfo::
 PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -491,7 +507,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
   // FIXME: This confuses implicit_def with optional CPSR def.
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
+  if (!MCID.getImplicitDefs() && !MI->hasOptionalDef())
     return false;
 
   bool Found = false;
@@ -510,11 +526,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
 /// By default, this returns true for every instruction with a
 /// PredicateOperand.
 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return false;
 
-  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+  if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
     ARMFunctionInfo *AFI =
       MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
     return AFI->isThumb2Function();
@@ -548,7 +563,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
     if (MI->isLabel())
       return 0;
-  unsigned Opc = MI->getOpcode();
+    unsigned Opc = MI->getOpcode();
     switch (Opc) {
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
@@ -556,6 +571,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::DBG_VALUE:
       return 0;
+    case TargetOpcode::BUNDLE:
+      return getInstBundleLength(MI);
     case ARM::MOVi16_ga_pcrel:
     case ARM::MOVTi16_ga_pcrel:
     case ARM::t2MOVi16_ga_pcrel:
@@ -593,7 +610,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
         ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
       unsigned NumOps = MCID.getNumOperands();
       MachineOperand JTOP =
-        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
+        MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
       unsigned JTI = JTOP.getIndex();
       const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
       assert(MJTI != 0);
@@ -622,6 +639,17 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   return 0; // Not reached
 }
 
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+  unsigned Size = 0;
+  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+  while (++I != E && I->isInsideBundle()) {
+    assert(!I->isBundle() && "No nested bundle!");
+    Size += GetInstSizeInBytes(&*I);
+  }
+  return Size;
+}
+
 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I, DebugLoc DL,
                                    unsigned DestReg, unsigned SrcReg,
@@ -845,7 +873,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
                                                     int &FrameIndex) const {
   const MachineMemOperand *Dummy;
-  return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
 }
 
 void ARMBaseInstrInfo::
@@ -991,7 +1019,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                              int &FrameIndex) const {
   const MachineMemOperand *Dummy;
-  return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
 }
 
 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1357,7 +1385,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
     return false;
 
   // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isLabel())
     return true;
 
   // Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1762,8 +1790,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
 
   // Check that CPSR isn't set between the comparison instruction and the one we
   // want to change.
-  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
-    B = MI->getParent()->begin();
+  MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
 
   // Early exit if CmpInstr is at the beginning of the BB.
   if (I == B) return false;
@@ -1957,7 +1984,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
   bool isKill = UseMI->getOperand(OpIdx).isKill();
   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
   AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
-                                      *UseMI, UseMI->getDebugLoc(),
+                                      UseMI, UseMI->getDebugLoc(),
                                       get(NewUseOpc), NewReg)
                               .addReg(Reg1, getKillRegState(isKill))
                               .addImm(SOImmValV1)));
@@ -2332,6 +2359,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return UseCycle;
 }
 
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+                                           const MachineInstr *MI, unsigned Reg,
+                                           unsigned &DefIdx, unsigned &Dist) {
+  Dist = 0;
+
+  MachineBasicBlock::const_iterator I = MI; ++I;
+  MachineBasicBlock::const_instr_iterator II =
+    llvm::prior(I.getInstrIterator());
+  assert(II->isInsideBundle() && "Empty bundle?");
+
+  int Idx = -1;
+  while (II->isInsideBundle()) {
+    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    if (Idx != -1)
+      break;
+    --II;
+    ++Dist;
+  }
+
+  assert(Idx != -1 && "Cannot find bundled definition!");
+  DefIdx = Idx;
+  return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+                                           const MachineInstr *MI, unsigned Reg,
+                                           unsigned &UseIdx, unsigned &Dist) {
+  Dist = 0;
+
+  MachineBasicBlock::const_instr_iterator II = MI; ++II;
+  assert(II->isInsideBundle() && "Empty bundle?");
+  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+  // FIXME: This doesn't properly handle multiple uses.
+  int Idx = -1;
+  while (II != E && II->isInsideBundle()) {
+    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+    if (Idx != -1)
+      break;
+    if (II->getOpcode() != ARM::t2IT)
+      ++Dist;
+    ++II;
+  }
+
+  if (Idx == -1) {
+    Dist = 0;
+    return 0;
+  }
+
+  UseIdx = Idx;
+  return II;
+}
+
 int
 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
                              const MachineInstr *DefMI, unsigned DefIdx,
@@ -2340,35 +2420,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
       DefMI->isRegSequence() || DefMI->isImplicitDef())
     return 1;
 
-  const MCInstrDesc &DefMCID = DefMI->getDesc();
   if (!ItinData || ItinData->isEmpty())
-    return DefMCID.mayLoad() ? 3 : 1;
+    return DefMI->mayLoad() ? 3 : 1;
 
-  const MCInstrDesc &UseMCID = UseMI->getDesc();
+  const MCInstrDesc *DefMCID = &DefMI->getDesc();
+  const MCInstrDesc *UseMCID = &UseMI->getDesc();
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
-  if (DefMO.getReg() == ARM::CPSR) {
+  unsigned Reg = DefMO.getReg();
+  if (Reg == ARM::CPSR) {
     if (DefMI->getOpcode() == ARM::FMSTAT) {
       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
       return Subtarget.isCortexA9() ? 1 : 20;
     }
 
     // CPSR set and branch can be paired in the same cycle.
-    if (UseMCID.isBranch())
+    if (UseMI->isBranch())
       return 0;
+
+    // Otherwise it takes the instruction latency (generally one).
+    int Latency = getInstrLatency(ItinData, DefMI);
+
+    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+    // its uses. Instructions which are otherwise scheduled between them may
+    // incur a code size penalty (not able to use the CPSR setting 16-bit
+    // instructions).
+    if (Latency > 0 && Subtarget.isThumb2()) {
+      const MachineFunction *MF = DefMI->getParent()->getParent();
+      if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+        --Latency;
+    }
+    return Latency;
   }
 
   unsigned DefAlign = DefMI->hasOneMemOperand()
     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   unsigned UseAlign = UseMI->hasOneMemOperand()
     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
-                                  UseMCID, UseIdx, UseAlign);
+
+  unsigned DefAdj = 0;
+  if (DefMI->isBundle()) {
+    DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+    if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+        DefMI->isRegSequence() || DefMI->isImplicitDef())
+      return 1;
+    DefMCID = &DefMI->getDesc();
+  }
+  unsigned UseAdj = 0;
+  if (UseMI->isBundle()) {
+    unsigned NewUseIdx;
+    const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+                                                   Reg, NewUseIdx, UseAdj);
+    if (NewUseMI) {
+      UseMI = NewUseMI;
+      UseIdx = NewUseIdx;
+      UseMCID = &UseMI->getDesc();
+    }
+  }
+
+  int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+                                  *UseMCID, UseIdx, UseAlign);
+  int Adj = DefAdj + UseAdj;
+  if (Adj) {
+    Latency -= (int)(DefAdj + UseAdj);
+    if (Latency < 1)
+      return 1;
+  }
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefMCID.getOpcode()) {
+    switch (DefMCID->getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2393,7 +2515,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefMCID.getOpcode()) {
+    switch (DefMCID->getOpcode()) {
     default: break;
     case ARM::VLD1q8:
     case ARM::VLD1q16:
@@ -2413,12 +2535,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD2q8:
     case ARM::VLD2q16:
     case ARM::VLD2q32:
-    case ARM::VLD2d8_UPD:
-    case ARM::VLD2d16_UPD:
-    case ARM::VLD2d32_UPD:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
+    case ARM::VLD2d8wb_fixed:
+    case ARM::VLD2d16wb_fixed:
+    case ARM::VLD2d32wb_fixed:
+    case ARM::VLD2q8wb_fixed:
+    case ARM::VLD2q16wb_fixed:
+    case ARM::VLD2q32wb_fixed:
+    case ARM::VLD2d8wb_register:
+    case ARM::VLD2d16wb_register:
+    case ARM::VLD2d32wb_register:
+    case ARM::VLD2q8wb_register:
+    case ARM::VLD2q16wb_register:
+    case ARM::VLD2q32wb_register:
     case ARM::VLD3d8:
     case ARM::VLD3d16:
     case ARM::VLD3d32:
@@ -2446,9 +2574,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD1DUPq8:
     case ARM::VLD1DUPq16:
     case ARM::VLD1DUPq32:
-    case ARM::VLD1DUPq8_UPD:
-    case ARM::VLD1DUPq16_UPD:
-    case ARM::VLD1DUPq32_UPD:
+    case ARM::VLD1DUPq8wb_fixed:
+    case ARM::VLD1DUPq16wb_fixed:
+    case ARM::VLD1DUPq32wb_fixed:
+    case ARM::VLD1DUPq8wb_register:
+    case ARM::VLD1DUPq16wb_register:
+    case ARM::VLD1DUPq32wb_register:
     case ARM::VLD2DUPd8:
     case ARM::VLD2DUPd16:
     case ARM::VLD2DUPd32:
@@ -2580,12 +2711,18 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD2q8Pseudo:
     case ARM::VLD2q16Pseudo:
     case ARM::VLD2q32Pseudo:
-    case ARM::VLD2d8Pseudo_UPD:
-    case ARM::VLD2d16Pseudo_UPD:
-    case ARM::VLD2d32Pseudo_UPD:
-    case ARM::VLD2q8Pseudo_UPD:
-    case ARM::VLD2q16Pseudo_UPD:
-    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD2d8PseudoWB_fixed:
+    case ARM::VLD2d16PseudoWB_fixed:
+    case ARM::VLD2d32PseudoWB_fixed:
+    case ARM::VLD2q8PseudoWB_fixed:
+    case ARM::VLD2q16PseudoWB_fixed:
+    case ARM::VLD2q32PseudoWB_fixed:
+    case ARM::VLD2d8PseudoWB_register:
+    case ARM::VLD2d16PseudoWB_register:
+    case ARM::VLD2d32PseudoWB_register:
+    case ARM::VLD2q8PseudoWB_register:
+    case ARM::VLD2q16PseudoWB_register:
+    case ARM::VLD2q32PseudoWB_register:
     case ARM::VLD3d8Pseudo:
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
@@ -2621,9 +2758,12 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD1DUPq8Pseudo:
     case ARM::VLD1DUPq16Pseudo:
     case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8Pseudo_UPD:
-    case ARM::VLD1DUPq16Pseudo_UPD:
-    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD1DUPq8PseudoWB_fixed:
+    case ARM::VLD1DUPq16PseudoWB_fixed:
+    case ARM::VLD1DUPq32PseudoWB_fixed:
+    case ARM::VLD1DUPq8PseudoWB_register:
+    case ARM::VLD1DUPq16PseudoWB_register:
+    case ARM::VLD1DUPq32PseudoWB_register:
     case ARM::VLD2DUPd8Pseudo:
     case ARM::VLD2DUPd16Pseudo:
     case ARM::VLD2DUPd32Pseudo:
@@ -2671,6 +2811,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return Latency;
 }
 
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+                                   const MachineInstr *DefMI, unsigned DefIdx,
+                                   const MachineInstr *DepMI) const {
+  unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+  if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+    return 1;
+
+  // If the second MI is predicated, then there is an implicit use dependency.
+  return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+                           DepMI->getNumOperands());
+}
+
 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                       const MachineInstr *MI,
                                       unsigned *PredCost) const {
@@ -2681,6 +2834,17 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
+  if (MI->isBundle()) {
+    int Latency = 0;
+    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+    while (++I != E && I->isInsideBundle()) {
+      if (I->getOpcode() != ARM::t2IT)
+        Latency += getInstrLatency(ItinData, I, PredCost);
+    }
+    return Latency;
+  }
+
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Class = MCID.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f321..68e8208 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -69,10 +69,7 @@ public:
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
   // Predication support.
-  bool isPredicated(const MachineInstr *MI) const {
-    int PIdx = MI->findFirstPredOperandIdx();
-    return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
-  }
+  bool isPredicated(const MachineInstr *MI) const;
 
   ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
     int PIdx = MI->findFirstPredOperandIdx();
@@ -213,12 +210,18 @@ public:
                         SDNode *DefNode, unsigned DefIdx,
                         SDNode *UseNode, unsigned UseIdx) const;
 
+  virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *DepMI) const;
+
   /// VFP/NEON execution domains.
   std::pair<uint16_t, uint16_t>
   getExecutionDomain(const MachineInstr *MI) const;
   void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
 
 private:
+  unsigned getInstBundleLength(const MachineInstr *MI) const;
+
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
                       const MCInstrDesc &DefMCID,
                       unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 7c42342..8ee6ce2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -631,7 +631,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   // 1. Dynamic stack realignment is explicitly disabled,
   // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
   // 3. There are VLAs in the function and the base pointer is disabled.
-  return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+  return (MF.getTarget().Options.RealignStack && !AFI->isThumb1OnlyFunction() &&
           (!MFI->hasVarSizedObjects() || EnableBasePointer));
 }
 
@@ -649,7 +649,7 @@ needsStackRealignment(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
     return true;
   return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
     || needsStackRealignment(MF);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index d74ccfa..365f0bb 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -401,7 +401,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
          MBB != E; ++MBB) {
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I)
         emitInstruction(*I);
     }
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a413..2039d41 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,43 @@ static cl::opt<bool>
 AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
           cl::desc("Adjust basic block layout to better use TB[BH]"));
 
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+          cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits.  This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+  if (KnownBits < LogAlign)
+    return (1u << LogAlign) - (1u << KnownBits);
+  return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints.  It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block.  If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+                                      unsigned KnownBits) {
+  // Add the worst possible padding that the unknown bits could cause.
+  Offset += UnknownPadding(LogAlign, KnownBits);
+
+  // Then align the result.
+  return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
   /// requires constant pool entries to be scattered among the instructions
@@ -64,16 +102,70 @@ namespace {
   ///   CPE     - A constant pool entry that has been placed somewhere, which
   ///             tracks a list of users.
   class ARMConstantIslands : public MachineFunctionPass {
-    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
-    /// by MBB Number.  The two-byte pads required for Thumb alignment are
-    /// counted as part of the following block (i.e., the offset and size for
-    /// a padded block will both be ==2 mod 4).
-    std::vector<unsigned> BBSizes;
+    /// BasicBlockInfo - Information about the offset and size of a single
+    /// basic block.
+    struct BasicBlockInfo {
+      /// Offset - Distance from the beginning of the function to the beginning
+      /// of this basic block.
+      ///
+      /// The offset is always aligned as required by the basic block.
+      unsigned Offset;
+
+      /// Size - Size of the basic block in bytes.  If the block contains
+      /// inline assembly, this is a worst case estimate.
+      ///
+      /// The size does not include any alignment padding whether from the
+      /// beginning of the block, or from an aligned jump table at the end.
+      unsigned Size;
+
+      /// KnownBits - The number of low bits in Offset that are known to be
+      /// exact.  The remaining bits of Offset are an upper bound.
+      uint8_t KnownBits;
+
+      /// Unalign - When non-zero, the block contains instructions (inline asm)
+      /// of unknown size.  The real size may be smaller than Size bytes by a
+      /// multiple of 1 << Unalign.
+      uint8_t Unalign;
+
+      /// PostAlign - When non-zero, the block terminator contains a .align
+      /// directive, so the end of the block is aligned to 1 << PostAlign
+      /// bytes.
+      uint8_t PostAlign;
+
+      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+        PostAlign(0) {}
+
+      /// Compute the number of known offset bits internally to this block.
+      /// This number should be used to predict worst case padding when
+      /// splitting the block.
+      unsigned internalKnownBits() const {
+        return Unalign ? Unalign : KnownBits;
+      }
+
+      /// Compute the offset immediately following this block.  If LogAlign is
+      /// specified, return the offset the successor block will get if it has
+      /// this alignment.
+      unsigned postOffset(unsigned LogAlign = 0) const {
+        unsigned PO = Offset + Size;
+        unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+        if (!LA)
+          return PO;
+        // Add alignment padding from the terminator.
+        return WorstCaseAlign(PO, LA, internalKnownBits());
+      }
+
+      /// Compute the number of known low bits of postOffset.  If this block
+      /// contains inline asm, the number of known bits drops to the
+      /// instruction alignment.  An aligned terminator may increase the number
+      /// of know bits.
+      /// If LogAlign is given, also consider the alignment of the next block.
+      unsigned postKnownBits(unsigned LogAlign = 0) const {
+        return std::max(std::max(unsigned(PostAlign), LogAlign),
+                        internalKnownBits());
+      }
+    };
 
-    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
-    /// The two-byte pads required for Thumb alignment are counted as part of
-    /// the following block.
-    std::vector<unsigned> BBOffsets;
+    std::vector<BasicBlockInfo> BBInfo;
 
     /// WaterList - A sorted list of basic blocks where islands could be placed
     /// (i.e. blocks that don't fall through to the following block, due
@@ -162,9 +254,8 @@ namespace {
     /// the branch fix up pass.
     bool HasFarJump;
 
-    /// HasInlineAsm - True if the function contains inline assembly.
-    bool HasInlineAsm;
-
+    MachineFunction *MF;
+    MachineConstantPool *MCP;
     const ARMInstrInfo *TII;
     const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
@@ -182,67 +273,65 @@ namespace {
     }
 
   private:
-    void DoInitialPlacement(MachineFunction &MF,
-                            std::vector<MachineInstr*> &CPEMIs);
+    void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
-    void JumpTableFunctionScan(MachineFunction &MF);
-    void InitialFunctionScan(MachineFunction &MF,
-                             const std::vector<MachineInstr*> &CPEMIs);
+    unsigned getCPELogAlign(const MachineInstr *CPEMI);
+    void JumpTableFunctionScan();
+    void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs);
     MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
     void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
-    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+    void AdjustBBOffsetsAfter(MachineBasicBlock *BB);
     bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
     int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
     bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
     void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
                         MachineBasicBlock *&NewMBB);
-    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+    bool HandleConstantPoolUser(unsigned CPUserIndex);
     void RemoveDeadCPEMI(MachineInstr *CPEMI);
     bool RemoveUnusedCPEntries();
     bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
                       MachineInstr *CPEMI, unsigned Disp, bool NegOk,
                       bool DoDump = false);
     bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
-                        CPUser &U);
-    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
-                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+                        CPUser &U, unsigned &Growth);
     bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
-    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
-    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
-    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpImmediateBr(ImmBranch &Br);
+    bool FixUpConditionalBr(ImmBranch &Br);
+    bool FixUpUnconditionalBr(ImmBranch &Br);
     bool UndoLRSpillRestore();
-    bool OptimizeThumb2Instructions(MachineFunction &MF);
-    bool OptimizeThumb2Branches(MachineFunction &MF);
-    bool ReorderThumb2JumpTables(MachineFunction &MF);
-    bool OptimizeThumb2JumpTables(MachineFunction &MF);
+    bool OptimizeThumb2Instructions();
+    bool OptimizeThumb2Branches();
+    bool ReorderThumb2JumpTables();
+    bool OptimizeThumb2JumpTables();
     MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
 
+    void ComputeBlockSize(MachineBasicBlock *MBB);
     unsigned GetOffsetOf(MachineInstr *MI) const;
     void dumpBBs();
-    void verify(MachineFunction &MF);
+    void verify();
+
+    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+                         const CPUser &U) {
+      return OffsetIsInRange(UserOffset, TrialOffset,
+                             U.MaxDisp, U.NegOk, U.IsSoImm);
+    }
   };
   char ARMConstantIslands::ID = 0;
 }
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
-  assert(BBOffsets.size() == BBSizes.size());
-  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
-    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
-  if (!isThumb)
-    return;
+void ARMConstantIslands::verify() {
 #ifndef NDEBUG
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
-    if (!MBB->empty() &&
-        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
-      unsigned MBBId = MBB->getNumber();
-      assert(HasInlineAsm ||
-             (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
-             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
-    }
+    unsigned Align = MBB->getAlignment();
+    unsigned MBBId = MBB->getNumber();
+    assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
   }
   for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
     CPUser &U = CPUsers[i];
@@ -257,10 +346,16 @@ void ARMConstantIslands::verify(MachineFunction &MF) {
 
 /// print block size and offset information - debugging
 void ARMConstantIslands::dumpBBs() {
-  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
-    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
-                 << " size " << BBSizes[J] << "\n");
-  }
+  DEBUG({
+    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+      const BasicBlockInfo &BBI = BBInfo[J];
+      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+             << " kb=" << unsigned(BBI.KnownBits)
+             << " ua=" << unsigned(BBI.Unalign)
+             << " pa=" << unsigned(BBI.PostAlign)
+             << format(" size=%#x\n", BBInfo[J].Size);
+    }
+  });
 }
 
 /// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +364,38 @@ FunctionPass *llvm::createARMConstantIslandPass() {
   return new ARMConstantIslands();
 }
 
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
-  MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  MCP = mf.getConstantPool();
 
-  TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
-  AFI = MF.getInfo<ARMFunctionInfo>();
-  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+  DEBUG(dbgs() << "***** ARMConstantIslands: "
+               << MCP->getConstants().size() << " CP entries, aligned to "
+               << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+  TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo();
+  AFI = MF->getInfo<ARMFunctionInfo>();
+  STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
 
   isThumb = AFI->isThumbFunction();
   isThumb1 = AFI->isThumb1OnlyFunction();
   isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
-  HasInlineAsm = false;
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
-  MF.RenumberBlocks();
+  MF->RenumberBlocks();
 
   // Try to reorder and otherwise adjust the block layout to make good use
   // of the TB[BH] instructions.
   bool MadeChange = false;
   if (isThumb2 && AdjustJumpTableBlocks) {
-    JumpTableFunctionScan(MF);
-    MadeChange |= ReorderThumb2JumpTables(MF);
+    JumpTableFunctionScan();
+    MadeChange |= ReorderThumb2JumpTables();
     // Data is out of date, so clear it. It'll be re-computed later.
     T2JumpTables.clear();
     // Blocks may have shifted around. Keep the numbering up to date.
-    MF.RenumberBlocks();
+    MF->RenumberBlocks();
   }
 
   // Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +403,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
   // ARM and Thumb2 functions need to be 4-byte aligned.
   if (!isThumb1)
-    MF.EnsureAlignment(2);  // 2 = log2(4)
+    MF->EnsureAlignment(2);  // 2 = log2(4)
 
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
-  if (!MCP.isEmpty()) {
-    DoInitialPlacement(MF, CPEMIs);
-    if (isThumb1)
-      MF.EnsureAlignment(2);  // 2 = log2(4)
-  }
+  if (!MCP->isEmpty())
+    DoInitialPlacement(CPEMIs);
 
   /// The next UID to take is the first unused one.
   AFI->initPICLabelUId(CPEMIs.size());
@@ -321,7 +417,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   // Do the initial scan of the function, building up information about the
   // sizes of each block, the location of all the water, and finding all of the
   // constant pool users.
-  InitialFunctionScan(MF, CPEMIs);
+  InitialFunctionScan(CPEMIs);
   CPEMIs.clear();
   DEBUG(dumpBBs());
 
@@ -333,9 +429,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   // is no change.
   unsigned NoCPIters = 0, NoBRIters = 0;
   while (true) {
+    DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
     bool CPChange = false;
     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
-      CPChange |= HandleConstantPoolUser(MF, i);
+      CPChange |= HandleConstantPoolUser(i);
     if (CPChange && ++NoCPIters > 30)
       llvm_unreachable("Constant Island pass failed to converge!");
     DEBUG(dumpBBs());
@@ -344,9 +441,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
     // appear as "new water" for the next iteration of constant pool placement.
     NewWaterList.clear();
 
+    DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
     bool BRChange = false;
     for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
-      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+      BRChange |= FixUpImmediateBr(ImmBranches[i]);
     if (BRChange && ++NoBRIters > 30)
       llvm_unreachable("Branch Fix Up pass failed to converge!");
     DEBUG(dumpBBs());
@@ -358,10 +456,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
   // Shrink 32-bit Thumb2 branch, load, and store instructions.
   if (isThumb2 && !STI->prefers32BitThumb())
-    MadeChange |= OptimizeThumb2Instructions(MF);
+    MadeChange |= OptimizeThumb2Instructions();
 
   // After a while, this might be made debug-only, but it is not expensive.
-  verify(MF);
+  verify();
 
   // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
   // undo the spill / restore of LR if possible.
@@ -376,10 +474,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  DEBUG(errs() << '\n'; dumpBBs());
+  DEBUG(dbgs() << '\n'; dumpBBs());
 
-  BBSizes.clear();
-  BBOffsets.clear();
+  BBInfo.clear();
   WaterList.clear();
   CPUsers.clear();
   CPEntries.clear();
@@ -392,37 +489,65 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
 /// DoInitialPlacement - Perform the initial placement of the constant pool
 /// entries.  To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
-                                        std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // Create the basic block to hold the CPE's.
-  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
-  MF.push_back(BB);
+  MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+  MF->push_back(BB);
+
+  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+  // Mark the basic block as required by the const-pool.
+  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+  // The function needs to be as aligned as the basic blocks. The linker may
+  // move functions around based on their alignment.
+  MF->EnsureAlignment(BB->getAlignment());
+
+  // Order the entries in BB by descending alignment.  That ensures correct
+  // alignment of all entries as long as BB is sufficiently aligned.  Keep
+  // track of the insertion point for each alignment.  We are going to bucket
+  // sort the entries as they are created.
+  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
 
   // Add all of the constants from the constant pool to the end block, use an
   // identity mapping of CPI's to CPE's.
-  const std::vector<MachineConstantPoolEntry> &CPs =
-    MF.getConstantPool()->getConstants();
+  const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const TargetData &TD = *MF.getTarget().getTargetData();
+  const TargetData &TD = *MF->getTarget().getTargetData();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
-    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
-    // we would have to pad them out or something so that instructions stay
-    // aligned.
-    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+    assert(Size >= 4 && "Too small constant pool entry");
+    unsigned Align = CPs[i].getAlignment();
+    assert(isPowerOf2_32(Align) && "Invalid alignment");
+    // Verify that all constant pool entries are a multiple of their alignment.
+    // If not, we would have to pad them out so that instructions stay aligned.
+    assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+    // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+    unsigned LogAlign = Log2_32(Align);
+    MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
     MachineInstr *CPEMI =
-      BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+      BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
         .addImm(i).addConstantPoolIndex(i).addImm(Size);
     CPEMIs.push_back(CPEMI);
 
+    // Ensure that future entries with higher alignment get inserted before
+    // CPEMI. This is bucket sort with iterators.
+    for (unsigned a = LogAlign + 1; a < MaxAlign; ++a)
+      if (InsPoint[a] == InsAt)
+        InsPoint[a] = CPEMI;
+
     // Add a new CPEntry, but no corresponding CPUser yet.
     std::vector<CPEntry> CPEs;
     CPEs.push_back(CPEntry(CPEMI, i));
     CPEntries.push_back(CPEs);
     ++NumCPEs;
-    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
-                 << "\n");
+    DEBUG(dbgs() << "Moved CPI#" << i << " to end of function\n");
   }
+  DEBUG(BB->dump());
 }
 
 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,17 +583,33 @@ ARMConstantIslands::CPEntry
   return NULL;
 }
 
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI.  Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+  assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+  // Everything is 4-byte aligned unless AlignConstantIslands is set.
+  if (!AlignConstantIslands)
+    return 2;
+
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+  unsigned Align = MCP->getConstants()[CPI].getAlignment();
+  assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+  return Log2_32(Align);
+}
+
 /// JumpTableFunctionScan - Do a scan of the function, building up
 /// information about the sizes of each block and the locations of all
 /// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::JumpTableFunctionScan() {
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock &MBB = *MBBI;
 
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I)
-      if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+      if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
         T2JumpTables.push_back(I);
   }
 }
@@ -476,23 +617,27 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
 /// InitialFunctionScan - Do the initial scan of the function, building up
 /// information about the sizes of each block, the location of all the water,
 /// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
-                                 const std::vector<MachineInstr*> &CPEMIs) {
-  // First thing, see if the function has any inline assembly in it. If so,
-  // we have to be conservative about alignment assumptions, as we don't
-  // know for sure the size of any instructions in the inline assembly.
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
-       MBBI != E; ++MBBI) {
-    MachineBasicBlock &MBB = *MBBI;
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-         I != E; ++I)
-      if (I->getOpcode() == ARM::INLINEASM)
-        HasInlineAsm = true;
-  }
+void ARMConstantIslands::
+InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) {
+  BBInfo.clear();
+  BBInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+    ComputeBlockSize(I);
+
+  // The known bits of the entry block offset are determined by the function
+  // alignment.
+  BBInfo.front().KnownBits = MF->getAlignment();
+
+  // Compute block offsets and known bits.
+  AdjustBBOffsetsAfter(MF->begin());
 
   // Now go back through the instructions and build up our data structures.
-  unsigned Offset = 0;
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock &MBB = *MBBI;
 
@@ -501,16 +646,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
     if (!BBHasFallthrough(&MBB))
       WaterList.push_back(&MBB);
 
-    unsigned MBBSize = 0;
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
       if (I->isDebugValue())
         continue;
-      // Add instruction size to MBBSize.
-      MBBSize += TII->GetInstSizeInBytes(I);
 
       int Opc = I->getOpcode();
-      if (I->getDesc().isBranch()) {
+      if (I->isBranch()) {
         bool isCond = false;
         unsigned Bits = 0;
         unsigned Scale = 1;
@@ -518,18 +660,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
         switch (Opc) {
         default:
           continue;  // Ignore other JT branches
-        case ARM::tBR_JTr:
-          // A Thumb1 table jump may involve padding; for the offsets to
-          // be right, functions containing these must be 4-byte aligned.
-          // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
-          // table entries. So this code checks whether offset of tBR_JTr + 2
-          // is aligned.  That is held in Offset+MBBSize, which already has
-          // 2 added in for the size of the mov pc instruction.
-          MF.EnsureAlignment(2U);
-          if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
-            // FIXME: Add a pseudo ALIGN instruction instead.
-            MBBSize += 2;           // padding
-          continue;   // Does not get an entry in ImmBranches
         case ARM::t2BR_JT:
           T2JumpTables.push_back(I);
           continue;   // Does not get an entry in ImmBranches
@@ -647,18 +777,30 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
           break;
         }
     }
+  }
+}
 
-    // In thumb mode, if this block is a constpool island, we may need padding
-    // so it's aligned on 4 byte boundary.
-    if (isThumb &&
-        !MBB.empty() &&
-        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-        ((Offset%4) != 0 || HasInlineAsm))
-      MBBSize += 2;
-
-    BBSizes.push_back(MBBSize);
-    BBOffsets.push_back(Offset);
-    Offset += MBBSize;
+/// ComputeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) {
+  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+  BBI.PostAlign = 0;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+       ++I) {
+    BBI.Size += TII->GetInstSizeInBytes(I);
+    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I->isInlineAsm())
+      BBI.Unalign = isThumb ? 1 : 2;
+  }
+
+  // tBR_JTr contains a .align 2 directive.
+  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+    BBI.PostAlign = 2;
+    MBB->getParent()->EnsureAlignment(2);
   }
 }
 
@@ -671,14 +813,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
   // The offset is composed of two things: the sum of the sizes of all MBB's
   // before this instruction's block, and the offset from the start of the block
   // it is in.
-  unsigned Offset = BBOffsets[MBB->getNumber()];
-
-  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
-  // alignment padding, and compensate if so.
-  if (isThumb &&
-      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-      (Offset%4 != 0 || HasInlineAsm))
-    Offset += 2;
+  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
 
   // Sum instructions before MI in MBB.
   for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
@@ -702,12 +837,9 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
   // Renumber the MBB's to keep them consecutive.
   NewBB->getParent()->RenumberBlocks(NewBB);
 
-  // Insert a size into BBSizes to align it properly with the (newly
+  // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
-  // Likewise for BBOffsets.
-  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add NewMBB as having
   // available water after it.
@@ -723,13 +855,12 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
 /// account for this change and returns the newly created block.
 MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   MachineBasicBlock *OrigBB = MI->getParent();
-  MachineFunction &MF = *OrigBB->getParent();
 
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
-    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
   MachineFunction::iterator MBBI = OrigBB; ++MBBI;
-  MF.insert(MBBI, NewBB);
+  MF->insert(MBBI, NewBB);
 
   // Splice the instructions starting with MI over to NewBB.
   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,16 +878,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   ++NumSplit;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
-  while (!OrigBB->succ_empty()) {
-    MachineBasicBlock *Succ = *OrigBB->succ_begin();
-    OrigBB->removeSuccessor(Succ);
-    NewBB->addSuccessor(Succ);
-
-    // This pass should be run after register allocation, so there should be no
-    // PHI nodes to update.
-    assert((Succ->empty() || !Succ->begin()->isPHI())
-           && "PHI nodes should be eliminated by now!");
-  }
+  NewBB->transferSuccessors(OrigBB);
 
   // OrigBB branches to NewBB.
   OrigBB->addSuccessor(NewBB);
@@ -764,14 +886,11 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // Update internal data structures to account for the newly inserted MBB.
   // This is almost the same as UpdateForInsertedWaterBlock, except that
   // the Water goes after OrigBB, not NewBB.
-  MF.RenumberBlocks(NewBB);
+  MF->RenumberBlocks(NewBB);
 
-  // Insert a size into BBSizes to align it properly with the (newly
+  // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
-  // Likewise for BBOffsets.
-  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add OrigMBB as having
   // available water after it (but not if it's already there, which happens
@@ -787,54 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
     WaterList.insert(IP, OrigBB);
   NewWaterList.insert(OrigBB);
 
-  unsigned OrigBBI = OrigBB->getNumber();
-  unsigned NewBBI = NewBB->getNumber();
-
-  int delta = isThumb1 ? 2 : 4;
-
   // Figure out how large the OrigBB is.  As the first half of the original
   // block, it cannot contain a tablejump.  The size includes
   // the new jump we added.  (It should be possible to do this without
   // recounting everything, but it's very confusing, and this is rarely
   // executed.)
-  unsigned OrigBBSize = 0;
-  for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
-       I != E; ++I)
-    OrigBBSize += TII->GetInstSizeInBytes(I);
-  BBSizes[OrigBBI] = OrigBBSize;
-
-  // ...and adjust BBOffsets for NewBB accordingly.
-  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+  ComputeBlockSize(OrigBB);
 
   // Figure out how large the NewMBB is.  As the second half of the original
   // block, it may contain a tablejump.
-  unsigned NewBBSize = 0;
-  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
-       I != E; ++I)
-    NewBBSize += TII->GetInstSizeInBytes(I);
-  // Set the size of NewBB in BBSizes.  It does not include any padding now.
-  BBSizes[NewBBI] = NewBBSize;
-
-  MachineInstr* ThumbJTMI = prior(NewBB->end());
-  if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
-    // We've added another 2-byte instruction before this tablejump, which
-    // means we will always need padding if we didn't before, and vice versa.
-
-    // The original offset of the jump instruction was:
-    unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
-    if (OrigOffset%4 == 0) {
-      // We had padding before and now we don't.  No net change in code size.
-      delta = 0;
-    } else {
-      // We didn't have padding before and now we do.
-      BBSizes[NewBBI] += 2;
-      delta = 4;
-    }
-  }
+  ComputeBlockSize(NewBB);
 
   // All BBOffsets following these blocks must be modified.
-  if (delta)
-    AdjustBBOffsetsAfter(NewBB, delta);
+  AdjustBBOffsetsAfter(OrigBB);
 
   return NewBB;
 }
@@ -882,19 +966,44 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
 
 /// WaterIsInRange - Returns true if a CPE placed after the specified
 /// Water (a basic block) will be in range for the specific MI.
-
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
 bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
-                                        MachineBasicBlock* Water, CPUser &U) {
-  unsigned MaxDisp = U.MaxDisp;
-  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
-                       BBSizes[Water->getNumber()];
-
-  // If the CPE is to be inserted before the instruction, that will raise
-  // the offset of the instruction.
-  if (CPEOffset < UserOffset)
-    UserOffset += U.CPEMI->getOperand(2).getImm();
-
-  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+                                        MachineBasicBlock* Water, CPUser &U,
+                                        unsigned &Growth) {
+  unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+  unsigned NextBlockOffset, NextBlockAlignment;
+  MachineFunction::const_iterator NextBlock = Water;
+  if (++NextBlock == MF->end()) {
+    NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+    NextBlockAlignment = 0;
+  } else {
+    NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+    NextBlockAlignment = NextBlock->getAlignment();
+  }
+  unsigned Size = U.CPEMI->getOperand(2).getImm();
+  unsigned CPEEnd = CPEOffset + Size;
+
+  // The CPE may be able to hide in the alignment padding before the next
+  // block. It may also cause more padding to be required if it is more aligned
+  // that the next block.
+  if (CPEEnd > NextBlockOffset) {
+    Growth = CPEEnd - NextBlockOffset;
+    // Compute the padding that would go at the end of the CPE to align the next
+    // block.
+    Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+    // If the CPE is to be inserted before the instruction, that will raise
+    // the offset of the instruction.  Also account for unknown alignment padding
+    // in blocks between CPE and the user.
+    if (CPEOffset < UserOffset)
+      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+  } else
+    // CPE fits in existing padding.
+    Growth = 0;
+
+  return OffsetIsInRange(UserOffset, CPEOffset, U);
 }
 
 /// CPEIsInRange - Returns true if the distance between specific MI and
@@ -903,14 +1012,20 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
                                       MachineInstr *CPEMI, unsigned MaxDisp,
                                       bool NegOk, bool DoDump) {
   unsigned CPEOffset  = GetOffsetOf(CPEMI);
-  assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+  assert(CPEOffset % 4 == 0 && "Misaligned CPE");
 
   if (DoDump) {
-    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
-                 << " max delta=" << MaxDisp
-                 << " insn address=" << UserOffset
-                 << " CPE address=" << CPEOffset
-                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+    DEBUG({
+      unsigned Block = MI->getParent()->getNumber();
+      const BasicBlockInfo &BBI = BBInfo[Block];
+      dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+             << " max delta=" << MaxDisp
+             << format(" insn address=%#x", UserOffset)
+             << " in BB#" << Block << ": "
+             << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+             << format("CPE address=%#x offset=%+d: ", CPEOffset,
+                       int(CPEOffset-UserOffset));
+    });
   }
 
   return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
@@ -933,55 +1048,17 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
 }
 #endif // NDEBUG
 
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
-                                              int delta) {
-  MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
-  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
-      i < e; ++i) {
-    BBOffsets[i] += delta;
-    // If some existing blocks have padding, adjust the padding as needed, a
-    // bit tricky.  delta can be negative so don't use % on that.
-    if (!isThumb)
-      continue;
-    MachineBasicBlock *MBB = MBBI;
-    if (!MBB->empty() && !HasInlineAsm) {
-      // Constant pool entries require padding.
-      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
-        unsigned OldOffset = BBOffsets[i] - delta;
-        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
-          // add new padding
-          BBSizes[i] += 2;
-          delta += 2;
-        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
-          // remove existing padding
-          BBSizes[i] -= 2;
-          delta -= 2;
-        }
-      }
-      // Thumb1 jump tables require padding.  They should be at the end;
-      // following unconditional branches are removed by AnalyzeBranch.
-      // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
-      // table entries. So this code checks whether offset of tBR_JTr
-      // is aligned; if it is, the offset of the jump table following the
-      // instruction will not be aligned, and we need padding.
-      MachineInstr *ThumbJTMI = prior(MBB->end());
-      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
-        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
-        unsigned OldMIOffset = NewMIOffset - delta;
-        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
-          // remove existing padding
-          BBSizes[i] -= 2;
-          delta -= 2;
-        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
-          // add new padding
-          BBSizes[i] += 2;
-          delta += 2;
-        }
-      }
-      if (delta==0)
-        return;
-    }
-    MBBI = llvm::next(MBBI);
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) {
+  for(unsigned i = BB->getNumber() + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+    // This is where block i begins.
+    BBInfo[i].Offset = Offset;
+    BBInfo[i].KnownBits = KnownBits;
   }
 }
 
@@ -1016,7 +1093,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
 
   // Check to see if the CPE is already in-range.
   if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
-    DEBUG(errs() << "In range\n");
+    DEBUG(dbgs() << "In range\n");
     return 1;
   }
 
@@ -1031,7 +1108,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
     if (CPEs[i].CPEMI == NULL)
       continue;
     if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
-      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+      DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
                    << CPEs[i].CPI << "\n");
       // Point the CPUser node to the replacement
       U.CPEMI = CPEs[i].CPEMI;
@@ -1079,10 +1156,9 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
   if (WaterList.empty())
     return false;
 
-  bool FoundWaterThatWouldPad = false;
-  water_iterator IPThatWouldPad;
-  for (water_iterator IP = prior(WaterList.end()),
-         B = WaterList.begin();; --IP) {
+  unsigned BestGrowth = ~0u;
+  for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+       --IP) {
     MachineBasicBlock* WaterBB = *IP;
     // Check if water is in range and is either at a lower address than the
     // current "high water mark" or a new water block that was created since
@@ -1092,31 +1168,24 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
     // should be relatively uncommon and when it does happen, we want to be
     // sure to take advantage of it for all the CPEs near that block, so that
     // we don't insert more branches than necessary.
-    if (WaterIsInRange(UserOffset, WaterBB, U) &&
+    unsigned Growth;
+    if (WaterIsInRange(UserOffset, WaterBB, U, Growth) &&
         (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
-         NewWaterList.count(WaterBB))) {
-      unsigned WBBId = WaterBB->getNumber();
-      if (isThumb &&
-          (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
-        // This is valid Water, but would introduce padding.  Remember
-        // it in case we don't find any Water that doesn't do this.
-        if (!FoundWaterThatWouldPad) {
-          FoundWaterThatWouldPad = true;
-          IPThatWouldPad = IP;
-        }
-      } else {
-        WaterIter = IP;
+         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+      // This is the least amount of required padding seen so far.
+      BestGrowth = Growth;
+      WaterIter = IP;
+      DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+                   << " Growth=" << Growth << '\n');
+
+      // Keep looking unless it is perfect.
+      if (BestGrowth == 0)
         return true;
-      }
     }
     if (IP == B)
       break;
   }
-  if (FoundWaterThatWouldPad) {
-    WaterIter = IPThatWouldPad;
-    return true;
-  }
-  return false;
+  return BestGrowth != ~0u;
 }
 
 /// CreateNewWater - No existing WaterList entry will work for
@@ -1132,114 +1201,143 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPELogAlign = getCPELogAlign(CPEMI);
   MachineBasicBlock *UserMBB = UserMI->getParent();
-  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
-                               BBSizes[UserMBB->getNumber()];
-  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+  const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
 
   // If the block does not end in an unconditional branch already, and if the
   // end of the block is within range, make new water there.  (The addition
   // below is for the unconditional branch we will be adding: 4 bytes on ARM +
   // Thumb2, 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
   // inside OffsetIsInRange.
-  if (BBHasFallthrough(UserMBB) &&
-      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
-                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
-    DEBUG(errs() << "Split at end of block\n");
-    if (&UserMBB->back() == UserMI)
-      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
-    NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
-    // Add an unconditional branch from UserMBB to fallthrough block.
-    // Record it for branch lengthening; this new branch will not get out of
-    // range, but if the preceding conditional branch is out of range, the
-    // targets will be exchanged, and the altered branch may be out of
-    // range, so the machinery has to know about it.
-    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
-    if (!isThumb)
-      BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
-    else
-      BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
-              .addImm(ARMCC::AL).addReg(0);
-    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
-    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
-                          MaxDisp, false, UncondBr));
-    int delta = isThumb1 ? 2 : 4;
-    BBSizes[UserMBB->getNumber()] += delta;
-    AdjustBBOffsetsAfter(UserMBB, delta);
-  } else {
-    // What a big block.  Find a place within the block to split it.
-    // This is a little tricky on Thumb1 since instructions are 2 bytes
-    // and constant pool entries are 4 bytes: if instruction I references
-    // island CPE, and instruction I+1 references CPE', it will
-    // not work well to put CPE as far forward as possible, since then
-    // CPE' cannot immediately follow it (that location is 2 bytes
-    // farther away from I+1 than CPE was from I) and we'd need to create
-    // a new island.  So, we make a first guess, then walk through the
-    // instructions between the one currently being looked at and the
-    // possible insertion point, and make sure any other instructions
-    // that reference CPEs will be able to use the same island area;
-    // if not, we back up the insertion point.
-
-    // The 4 in the following is for the unconditional branch we'll be
-    // inserting (allows for long branch on Thumb1).  Alignment of the
-    // island is handled inside OffsetIsInRange.
-    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
-    // This could point off the end of the block if we've already got
-    // constant pool entries following this block; only the last one is
-    // in the water list.  Back past any possible branches (allow for a
-    // conditional and a maximally long unconditional).
-    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
-      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
-                              (isThumb1 ? 6 : 8);
-    unsigned EndInsertOffset = BaseInsertOffset +
-           CPEMI->getOperand(2).getImm();
-    MachineBasicBlock::iterator MI = UserMI;
-    ++MI;
-    unsigned CPUIndex = CPUserIndex+1;
-    unsigned NumCPUsers = CPUsers.size();
-    MachineInstr *LastIT = 0;
-    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
-         Offset < BaseInsertOffset;
-         Offset += TII->GetInstSizeInBytes(MI),
-           MI = llvm::next(MI)) {
-      if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
-        CPUser &U = CPUsers[CPUIndex];
-        if (!OffsetIsInRange(Offset, EndInsertOffset,
-                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
-          BaseInsertOffset -= (isThumb1 ? 2 : 4);
-          EndInsertOffset  -= (isThumb1 ? 2 : 4);
-        }
-        // This is overly conservative, as we don't account for CPEMIs
-        // being reused within the block, but it doesn't matter much.
-        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
-        CPUIndex++;
-      }
+  if (BBHasFallthrough(UserMBB)) {
+    // Size of branch to insert.
+    unsigned Delta = isThumb1 ? 2 : 4;
+    // End of UserBlock after adding a branch.
+    unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+    // Compute the offset where the CPE will begin.
+    unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+                                        UserBBI.postKnownBits());
+
+    if (OffsetIsInRange(UserOffset, CPEOffset, U)) {
+      DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+            << format(", expected CPE offset %#x\n", CPEOffset));
+      NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+      // Add an unconditional branch from UserMBB to fallthrough block.  Record
+      // it for branch lengthening; this new branch will not get out of range,
+      // but if the preceding conditional branch is out of range, the targets
+      // will be exchanged, and the altered branch may be out of range, so the
+      // machinery has to know about it.
+      int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+      if (!isThumb)
+        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+      else
+        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+          .addImm(ARMCC::AL).addReg(0);
+      unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+      ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+                                      MaxDisp, false, UncondBr));
+      BBInfo[UserMBB->getNumber()].Size += Delta;
+      AdjustBBOffsetsAfter(UserMBB);
+      return;
+    }
+  }
 
-      // Remember the last IT instruction.
-      if (MI->getOpcode() == ARM::t2IT)
-        LastIT = MI;
+  // What a big block.  Find a place within the block to split it.  This is a
+  // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+  // entries are 4 bytes: if instruction I references island CPE, and
+  // instruction I+1 references CPE', it will not work well to put CPE as far
+  // forward as possible, since then CPE' cannot immediately follow it (that
+  // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+  // need to create a new island.  So, we make a first guess, then walk through
+  // the instructions between the one currently being looked at and the
+  // possible insertion point, and make sure any other instructions that
+  // reference CPEs will be able to use the same island area; if not, we back
+  // up the insertion point.
+
+  // Try to split the block so it's fully aligned.  Compute the latest split
+  // point where we can add a 4-byte branch instruction, and then
+  // WorstCaseAlign to LogAlign.
+  unsigned LogAlign = MF->getAlignment();
+  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+  unsigned KnownBits = UserBBI.internalKnownBits();
+  unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+  unsigned BaseInsertOffset = UserOffset + U.MaxDisp;
+  DEBUG(dbgs() << format("Split in middle of big block before %#x",
+                         BaseInsertOffset));
+
+  // Account for alignment and unknown padding.
+  BaseInsertOffset &= ~((1u << LogAlign) - 1);
+  BaseInsertOffset -= UPad;
+
+  // The 4 in the following is for the unconditional branch we'll be inserting
+  // (allows for long branch on Thumb1).  Alignment of the island is handled
+  // inside OffsetIsInRange.
+  BaseInsertOffset -= 4;
+
+  DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+               << " la=" << LogAlign
+               << " kb=" << KnownBits
+               << " up=" << UPad << '\n');
+
+  // This could point off the end of the block if we've already got constant
+  // pool entries following this block; only the last one is in the water list.
+  // Back past any possible branches (allow for a conditional and a maximally
+  // long unconditional).
+  if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+    BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+      (isThumb1 ? 6 : 8);
+  unsigned EndInsertOffset =
+    WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+    CPEMI->getOperand(2).getImm();
+  MachineBasicBlock::iterator MI = UserMI;
+  ++MI;
+  unsigned CPUIndex = CPUserIndex+1;
+  unsigned NumCPUsers = CPUsers.size();
+  MachineInstr *LastIT = 0;
+  for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+       Offset < BaseInsertOffset;
+       Offset += TII->GetInstSizeInBytes(MI),
+       MI = llvm::next(MI)) {
+    if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+      CPUser &U = CPUsers[CPUIndex];
+      if (!OffsetIsInRange(Offset, EndInsertOffset, U)) {
+        // Shift intertion point by one unit of alignment so it is within reach.
+        BaseInsertOffset -= 1u << LogAlign;
+        EndInsertOffset  -= 1u << LogAlign;
+      }
+      // This is overly conservative, as we don't account for CPEMIs being
+      // reused within the block, but it doesn't matter much.  Also assume CPEs
+      // are added in order with alignment padding.  We may eventually be able
+      // to pack the aligned CPEs better.
+      EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+                                           1u << getCPELogAlign(U.CPEMI)) +
+        U.CPEMI->getOperand(2).getImm();
+      CPUIndex++;
     }
 
-    DEBUG(errs() << "Split in middle of big block\n");
-    --MI;
+    // Remember the last IT instruction.
+    if (MI->getOpcode() == ARM::t2IT)
+      LastIT = MI;
+  }
 
-    // Avoid splitting an IT block.
-    if (LastIT) {
-      unsigned PredReg = 0;
-      ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
-      if (CC != ARMCC::AL)
-        MI = LastIT;
-    }
-    NewMBB = SplitBlockBeforeInstr(MI);
+  --MI;
+
+  // Avoid splitting an IT block.
+  if (LastIT) {
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+    if (CC != ARMCC::AL)
+      MI = LastIT;
   }
+  NewMBB = SplitBlockBeforeInstr(MI);
 }
 
 /// HandleConstantPoolUser - Analyze the specified user, checking to see if it
 /// is out-of-range.  If so, pick up the constant pool value and move it some
 /// place in-range.  Return true if we changed any addresses (thus must run
 /// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
-                                                unsigned CPUserIndex) {
+bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
@@ -1260,11 +1358,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
   unsigned ID = AFI->createPICLabelUId();
 
   // Look for water where we can place this CPE.
-  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+  MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
   MachineBasicBlock *NewMBB;
   water_iterator IP;
   if (LookForWater(U, UserOffset, IP)) {
-    DEBUG(errs() << "found water in range\n");
+    DEBUG(dbgs() << "Found water in range\n");
     MachineBasicBlock *WaterBB = *IP;
 
     // If the original WaterList entry was "new water" on this iteration,
@@ -1279,7 +1377,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
 
   } else {
     // No water found.
-    DEBUG(errs() << "No water found\n");
+    DEBUG(dbgs() << "No water found\n");
     CreateNewWater(CPUserIndex, UserOffset, NewMBB);
 
     // SplitBlockBeforeInstr adds to WaterList, which is important when it is
@@ -1304,7 +1402,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
     WaterList.erase(IP);
 
   // Okay, we know we can put an island before NewMBB now, do it!
-  MF.insert(NewMBB, NewIsland);
+  MF->insert(NewMBB, NewIsland);
 
   // Update internal data structures to account for the newly inserted MBB.
   UpdateForInsertedWaterBlock(NewIsland);
@@ -1320,13 +1418,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
   ++NumCPEs;
 
-  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
-  // Compensate for .align 2 in thumb mode.
-  if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
-    Size += 2;
+  // Mark the basic block as aligned as required by the const-pool entry.
+  NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
   // Increase the size of the island block to account for the new entry.
-  BBSizes[NewIsland->getNumber()] += Size;
-  AdjustBBOffsetsAfter(NewIsland, Size);
+  BBInfo[NewIsland->getNumber()].Size += Size;
+  AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
 
   // Finally, change the CPI in the instruction operand to be ID.
   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,8 +1432,8 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
       break;
     }
 
-  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
-           << '\t' << *UserMI);
+  DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+        << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
 
   return true;
 }
@@ -1347,19 +1444,18 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
   MachineBasicBlock *CPEBB = CPEMI->getParent();
   unsigned Size = CPEMI->getOperand(2).getImm();
   CPEMI->eraseFromParent();
-  BBSizes[CPEBB->getNumber()] -= Size;
+  BBInfo[CPEBB->getNumber()].Size -= Size;
   // All succeeding offsets have the current size value added in, fix this.
   if (CPEBB->empty()) {
-    // In thumb1 mode, the size of island may be padded by two to compensate for
-    // the alignment requirement.  Then it will now be 2 when the block is
-    // empty, so fix this.
-    // All succeeding offsets have the current size value added in, fix this.
-    if (BBSizes[CPEBB->getNumber()] != 0) {
-      Size += BBSizes[CPEBB->getNumber()];
-      BBSizes[CPEBB->getNumber()] = 0;
-    }
-  }
-  AdjustBBOffsetsAfter(CPEBB, -Size);
+    BBInfo[CPEBB->getNumber()].Size = 0;
+
+    // This block no longer needs to be aligned. <rdar://problem/10534709>.
+    CPEBB->setAlignment(0);
+  } else
+    // Entries are sorted by descending alignment, so realign from the front.
+    CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+  AdjustBBOffsetsAfter(CPEBB);
   // An island has only one predecessor BB and one successor BB. Check if
   // this BB's predecessor jumps directly to this BB's successor. This
   // shouldn't happen currently.
@@ -1390,9 +1486,9 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
                                      unsigned MaxDisp) {
   unsigned PCAdj      = isThumb ? 4 : 8;
   unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
-  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
 
-  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
                << " from BB#" << MI->getParent()->getNumber()
                << " max delta=" << MaxDisp
                << " from " << GetOffsetOf(MI) << " to " << DestOffset
@@ -1411,7 +1507,7 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
 
 /// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
 /// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
@@ -1420,8 +1516,8 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
     return false;
 
   if (!Br.isCond)
-    return FixUpUnconditionalBr(MF, Br);
-  return FixUpConditionalBr(MF, Br);
+    return FixUpUnconditionalBr(Br);
+  return FixUpConditionalBr(Br);
 }
 
 /// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
@@ -1429,7 +1525,7 @@ bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
 /// spilled in the epilogue, then we can use BL to implement a far jump.
 /// Otherwise, add an intermediate branch instruction to a branch.
 bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *MBB = MI->getParent();
   if (!isThumb1)
@@ -1438,12 +1534,12 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
   MI->setDesc(TII->get(ARM::tBfar));
-  BBSizes[MBB->getNumber()] += 2;
-  AdjustBBOffsetsAfter(MBB, 2);
+  BBInfo[MBB->getNumber()].Size += 2;
+  AdjustBBOffsetsAfter(MBB);
   HasFarJump = true;
   ++NumUBrFixed;
 
-  DEBUG(errs() << "  Changed B to long jump " << *MI);
+  DEBUG(dbgs() << "  Changed B to long jump " << *MI);
 
   return true;
 }
@@ -1452,7 +1548,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
 /// far away to fit in its displacement field. It is converted to an inverse
 /// conditional branch + an unconditional branch to the destination.
 bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
@@ -1487,7 +1583,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
       // b   L1
       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
       if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
-        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
+        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
                      << *BMI);
         BMI->getOperand(0).setMBB(DestBB);
         MI->getOperand(0).setMBB(NewDest);
@@ -1502,15 +1598,13 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
     // No need for the branch to the next block. We're adding an unconditional
     // branch to the destination.
     int delta = TII->GetInstSizeInBytes(&MBB->back());
-    BBSizes[MBB->getNumber()] -= delta;
-    MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
-    AdjustBBOffsetsAfter(SplitBB, -delta);
+    BBInfo[MBB->getNumber()].Size -= delta;
     MBB->back().eraseFromParent();
-    // BBOffsets[SplitBB] is wrong temporarily, fixed below
+    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
   }
   MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
 
-  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
+  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                << " also invert condition and change dest. to BB#"
                << NextBB->getNumber() << "\n");
 
@@ -1519,23 +1613,20 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   Br.MI = &MBB->back();
-  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   if (isThumb)
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
             .addImm(ARMCC::AL).addReg(0);
   else
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
-  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
-  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+  BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
   MI->eraseFromParent();
-
-  // The net size change is an addition of one unconditional branch.
-  int delta = TII->GetInstSizeInBytes(&MBB->back());
-  AdjustBBOffsetsAfter(MBB, delta);
+  AdjustBBOffsetsAfter(MBB);
   return true;
 }
 
@@ -1561,7 +1652,7 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
   return MadeChange;
 }
 
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Instructions() {
   bool MadeChange = false;
 
   // Shrink ADR and LDR from constantpool.
@@ -1598,19 +1689,19 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
     if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
       U.MI->setDesc(TII->get(NewOpc));
       MachineBasicBlock *MBB = U.MI->getParent();
-      BBSizes[MBB->getNumber()] -= 2;
-      AdjustBBOffsetsAfter(MBB, -2);
+      BBInfo[MBB->getNumber()].Size -= 2;
+      AdjustBBOffsetsAfter(MBB);
       ++NumT2CPShrunk;
       MadeChange = true;
     }
   }
 
-  MadeChange |= OptimizeThumb2Branches(MF);
-  MadeChange |= OptimizeThumb2JumpTables(MF);
+  MadeChange |= OptimizeThumb2Branches();
+  MadeChange |= OptimizeThumb2JumpTables();
   return MadeChange;
 }
 
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2Branches() {
   bool MadeChange = false;
 
   for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1639,8 +1730,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
       if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
         Br.MI->setDesc(TII->get(NewOpc));
         MachineBasicBlock *MBB = Br.MI->getParent();
-        BBSizes[MBB->getNumber()] -= 2;
-        AdjustBBOffsetsAfter(MBB, -2);
+        BBInfo[MBB->getNumber()].Size -= 2;
+        AdjustBBOffsetsAfter(MBB);
         ++NumT2BrShrunk;
         MadeChange = true;
       }
@@ -1663,7 +1754,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     // Check if the distance is within 126. Subtract starting offset by 2
     // because the cmp will be eliminated.
     unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
-    unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+    unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
     if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
       MachineBasicBlock::iterator CmpMI = Br.MI;
       if (CmpMI != Br.MI->getParent()->begin()) {
@@ -1681,8 +1772,8 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
             CmpMI->eraseFromParent();
             Br.MI->eraseFromParent();
             Br.MI = NewBR;
-            BBSizes[MBB->getNumber()] -= 2;
-            AdjustBBOffsetsAfter(MBB, -2);
+            BBInfo[MBB->getNumber()].Size -= 2;
+            AdjustBBOffsetsAfter(MBB);
             ++NumCBZ;
             MadeChange = true;
           }
@@ -1696,12 +1787,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
 
 /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
 /// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::OptimizeThumb2JumpTables() {
   bool MadeChange = false;
 
   // FIXME: After the tables are shrunk, can we get rid some of the
   // constantpool tables?
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return false;
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,7 +1800,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1720,7 +1811,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
     for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
       MachineBasicBlock *MBB = JTBBs[j];
-      unsigned DstOffset = BBOffsets[MBB->getNumber()];
+      unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
       // Negative offset is not ok. FIXME: We should change BB layout to make
       // sure all the branches are forward.
       if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1808,8 +1899,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
       MI->eraseFromParent();
 
       int delta = OrigSize - NewSize;
-      BBSizes[MBB->getNumber()] -= delta;
-      AdjustBBOffsetsAfter(MBB, -delta);
+      BBInfo[MBB->getNumber()].Size -= delta;
+      AdjustBBOffsetsAfter(MBB);
 
       ++NumTBs;
       MadeChange = true;
@@ -1821,10 +1912,10 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
 
 /// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
 /// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::ReorderThumb2JumpTables() {
   bool MadeChange = false;
 
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return false;
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +1923,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1864,8 +1955,6 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
 MachineBasicBlock *ARMConstantIslands::
 AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
 {
-  MachineFunction &MF = *BB->getParent();
-
   // If the destination block is terminated by an unconditional branch,
   // try to move it; otherwise, create a new block following the jump
   // table that branches back to the actual target. This is a very simple
@@ -1882,22 +1971,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
   // If the block ends in an unconditional branch, move it. The prior block
   // has to have an analyzable terminator for us to move this one. Be paranoid
   // and make sure we're not trying to move the entry block of the function.
-  if (!B && Cond.empty() && BB != MF.begin() &&
+  if (!B && Cond.empty() && BB != MF->begin() &&
       !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
     BB->moveAfter(JTBB);
     OldPrior->updateTerminator();
     BB->updateTerminator();
     // Update numbering to account for the block being moved.
-    MF.RenumberBlocks();
+    MF->RenumberBlocks();
     ++NumJTMoved;
     return NULL;
   }
 
   // Create a new MBB for the code after the jump BB.
   MachineBasicBlock *NewBB =
-    MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+    MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
   MachineFunction::iterator MBBI = JTBB; ++MBBI;
-  MF.insert(MBBI, NewBB);
+  MF->insert(MBBI, NewBB);
 
   // Add an unconditional branch from NewBB to BB.
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +1996,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
           .addImm(ARMCC::AL).addReg(0);
 
   // Update internal data structures to account for the newly inserted MBB.
-  MF.RenumberBlocks(NewBB);
+  MF->RenumberBlocks(NewBB);
 
   // Update the CFG.
   NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fc464ea..01d772d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -61,7 +61,7 @@ namespace {
     void ExpandVST(MachineBasicBlock::iterator &MBBI);
     void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
     void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
-                    unsigned Opc, bool IsExt, unsigned NumRegs);
+                    unsigned Opc, bool IsExt);
     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI);
   };
@@ -129,12 +129,15 @@ namespace {
 }
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, false, SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true,  SingleSpc, 2, 4,true},
-{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, false, SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true,  SingleSpc, 2, 2,true},
-{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, false, SingleSpc, 2, 8,true},
-{ ARM::VLD1DUPq8Pseudo_UPD,  ARM::VLD1DUPq8_UPD, true, true, true,  SingleSpc, 2, 8,true},
+{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, false, SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true,  SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true,  SingleSpc, 2, 4,false},
+{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, false, SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false,  SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true,  SingleSpc, 2, 2,false},
+{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, false, SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_fixed,  ARM::VLD1DUPq8wb_fixed, true, true, false,  SingleSpc, 2, 8,false},
+{ ARM::VLD1DUPq8PseudoWB_register,  ARM::VLD1DUPq8wb_register, true, true, true,  SingleSpc, 2, 8,false},
 
 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
@@ -177,18 +180,24 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true,  EvenDblSpc, 2, 2 ,true},
 
 { ARM::VLD2d16Pseudo,       ARM::VLD2d16,      true,  false, false, SingleSpc,  2, 4 ,false},
-{ ARM::VLD2d16Pseudo_UPD,   ARM::VLD2d16_UPD, true, true, true,  SingleSpc,  2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_fixed,   ARM::VLD2d16wb_fixed, true, true, false,  SingleSpc,  2, 4 ,false},
+{ ARM::VLD2d16PseudoWB_register,   ARM::VLD2d16wb_register, true, true, true,  SingleSpc,  2, 4 ,false},
 { ARM::VLD2d32Pseudo,       ARM::VLD2d32,      true,  false, false, SingleSpc,  2, 2 ,false},
-{ ARM::VLD2d32Pseudo_UPD,   ARM::VLD2d32_UPD, true, true, true,  SingleSpc,  2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_fixed,   ARM::VLD2d32wb_fixed, true, true, false,  SingleSpc,  2, 2 ,false},
+{ ARM::VLD2d32PseudoWB_register,   ARM::VLD2d32wb_register, true, true, true,  SingleSpc,  2, 2 ,false},
 { ARM::VLD2d8Pseudo,        ARM::VLD2d8,       true,  false, false, SingleSpc,  2, 8 ,false},
-{ ARM::VLD2d8Pseudo_UPD,    ARM::VLD2d8_UPD, true, true, true,  SingleSpc,  2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_fixed,    ARM::VLD2d8wb_fixed, true, true, false,  SingleSpc,  2, 8 ,false},
+{ ARM::VLD2d8PseudoWB_register,    ARM::VLD2d8wb_register, true, true, true,  SingleSpc,  2, 8 ,false},
 
 { ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, false, SingleSpc,  4, 4 ,false},
-{ ARM::VLD2q16Pseudo_UPD,   ARM::VLD2q16_UPD, true, true, true,  SingleSpc,  4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed,   ARM::VLD2q16wb_fixed, true, true, false,  SingleSpc,  4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register,   ARM::VLD2q16wb_register, true, true, true,  SingleSpc,  4, 4 ,false},
 { ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, false, SingleSpc,  4, 2 ,false},
-{ ARM::VLD2q32Pseudo_UPD,   ARM::VLD2q32_UPD, true, true, true,  SingleSpc,  4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed,   ARM::VLD2q32wb_fixed, true, true, false,  SingleSpc,  4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register,   ARM::VLD2q32wb_register, true, true, true,  SingleSpc,  4, 2 ,false},
 { ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, false, SingleSpc,  4, 8 ,false},
-{ ARM::VLD2q8Pseudo_UPD,    ARM::VLD2q8_UPD, true, true, true,  SingleSpc,  4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed,    ARM::VLD2q8wb_fixed, true, true, false,  SingleSpc,  4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register,    ARM::VLD2q8wb_register, true, true, true,  SingleSpc,  4, 8 ,false},
 
 { ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, false, SingleSpc, 3, 4,true},
 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true,  SingleSpc, 3, 4,true},
@@ -267,10 +276,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, false, EvenDblSpc, 1, 8 ,true},
 { ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true, true,  EvenDblSpc, 1, 8 ,true},
 
-{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, false, SingleSpc,  4, 1 ,true},
-{ ARM::VST1d64QPseudo_UPD,  ARM::VST1d64Q_UPD, false, true, true,  SingleSpc,  4, 1 ,true},
-{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, false, SingleSpc,  3, 1 ,true},
-{ ARM::VST1d64TPseudo_UPD,  ARM::VST1d64T_UPD, false, true, true,  SingleSpc,  3, 1 ,true},
+{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, false, SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed,  ARM::VST1d64Qwb_fixed, false, true, false,  SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true,  SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, false, SingleSpc,  3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed,  ARM::VST1d64Twb_fixed, false, true, false,  SingleSpc,  3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register,  ARM::VST1d64Twb_register, false, true, true,  SingleSpc,  3, 1 ,false},
 
 { ARM::VST1q16Pseudo,       ARM::VST1q16,      false, false, false, SingleSpc,  2, 4 ,false},
 { ARM::VST1q16PseudoWB_fixed,   ARM::VST1q16wb_fixed, false, true, false,  SingleSpc,  2, 4 ,false},
@@ -296,19 +307,25 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, false, EvenDblSpc, 2, 2,true},
 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true,  EvenDblSpc, 2, 2,true},
 
-{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, false, SingleSpc,  2, 4 ,true},
-{ ARM::VST2d16Pseudo_UPD,   ARM::VST2d16_UPD, false, true, true,  SingleSpc,  2, 4 ,true},
-{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, false, SingleSpc,  2, 2 ,true},
-{ ARM::VST2d32Pseudo_UPD,   ARM::VST2d32_UPD, false, true, true,  SingleSpc,  2, 2 ,true},
-{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, false, SingleSpc,  2, 8 ,true},
-{ ARM::VST2d8Pseudo_UPD,    ARM::VST2d8_UPD, false, true, true,  SingleSpc,  2, 8 ,true},
-
-{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, false, SingleSpc,  4, 4 ,true},
-{ ARM::VST2q16Pseudo_UPD,   ARM::VST2q16_UPD, false, true, true,  SingleSpc,  4, 4 ,true},
-{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, false, SingleSpc,  4, 2 ,true},
-{ ARM::VST2q32Pseudo_UPD,   ARM::VST2q32_UPD, false, true, true,  SingleSpc,  4, 2 ,true},
-{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, false, SingleSpc,  4, 8 ,true},
-{ ARM::VST2q8Pseudo_UPD,    ARM::VST2q8_UPD, false, true, true,  SingleSpc,  4, 8 ,true},
+{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, false, SingleSpc,  2, 4 ,false},
+{ ARM::VST2d16PseudoWB_fixed,   ARM::VST2d16wb_fixed, false, true, false,  SingleSpc,  2, 4 ,false},
+{ ARM::VST2d16PseudoWB_register,   ARM::VST2d16wb_register, false, true, true,  SingleSpc,  2, 4 ,false},
+{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, false, SingleSpc,  2, 2 ,false},
+{ ARM::VST2d32PseudoWB_fixed,   ARM::VST2d32wb_fixed, false, true, true,  SingleSpc,  2, 2 ,false},
+{ ARM::VST2d32PseudoWB_register,   ARM::VST2d32wb_register, false, true, true,  SingleSpc,  2, 2 ,false},
+{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, false, SingleSpc,  2, 8 ,false},
+{ ARM::VST2d8PseudoWB_fixed,    ARM::VST2d8wb_fixed, false, true, false,  SingleSpc,  2, 8 ,false},
+{ ARM::VST2d8PseudoWB_register,    ARM::VST2d8wb_register, false, true, true,  SingleSpc,  2, 8 ,false},
+
+{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, false, SingleSpc,  4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed,   ARM::VST2q16wb_fixed, false, true, false,  SingleSpc,  4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register,   ARM::VST2q16wb_register, false, true, true,  SingleSpc,  4, 4 ,false},
+{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, false, SingleSpc,  4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed,   ARM::VST2q32wb_fixed, false, true, false,  SingleSpc,  4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register,   ARM::VST2q32wb_register, false, true, true,  SingleSpc,  4, 2 ,false},
+{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, false, SingleSpc,  4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed,    ARM::VST2q8wb_fixed, false, true, false,  SingleSpc,  4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register,    ARM::VST2q8wb_register, false, true, true,  SingleSpc,  4, 8 ,false},
 
 { ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, false, SingleSpc, 3, 4 ,true},
 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true,  SingleSpc, 3, 4 ,true},
@@ -620,7 +637,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
 /// register operands to real instructions with D register operands.
 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
-                                 unsigned Opc, bool IsExt, unsigned NumRegs) {
+                                 unsigned Opc, bool IsExt) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
 
@@ -636,11 +653,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
   unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
   GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
-  MIB.addReg(D0).addReg(D1);
-  if (NumRegs > 2)
-    MIB.addReg(D2);
-  if (NumRegs > 3)
-    MIB.addReg(D3);
+  MIB.addReg(D0);
 
   // Copy the other source register operand.
   MIB.addOperand(MI.getOperand(OpIdx++));
@@ -1090,12 +1103,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD2q8Pseudo:
     case ARM::VLD2q16Pseudo:
     case ARM::VLD2q32Pseudo:
-    case ARM::VLD2d8Pseudo_UPD:
-    case ARM::VLD2d16Pseudo_UPD:
-    case ARM::VLD2d32Pseudo_UPD:
-    case ARM::VLD2q8Pseudo_UPD:
-    case ARM::VLD2q16Pseudo_UPD:
-    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD2d8PseudoWB_fixed:
+    case ARM::VLD2d16PseudoWB_fixed:
+    case ARM::VLD2d32PseudoWB_fixed:
+    case ARM::VLD2q8PseudoWB_fixed:
+    case ARM::VLD2q16PseudoWB_fixed:
+    case ARM::VLD2q32PseudoWB_fixed:
+    case ARM::VLD2d8PseudoWB_register:
+    case ARM::VLD2d16PseudoWB_register:
+    case ARM::VLD2d32PseudoWB_register:
+    case ARM::VLD2q8PseudoWB_register:
+    case ARM::VLD2q16PseudoWB_register:
+    case ARM::VLD2q32PseudoWB_register:
     case ARM::VLD3d8Pseudo:
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
@@ -1131,9 +1150,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD1DUPq8Pseudo:
     case ARM::VLD1DUPq16Pseudo:
     case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8Pseudo_UPD:
-    case ARM::VLD1DUPq16Pseudo_UPD:
-    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD1DUPq8PseudoWB_fixed:
+    case ARM::VLD1DUPq16PseudoWB_fixed:
+    case ARM::VLD1DUPq32PseudoWB_fixed:
+    case ARM::VLD1DUPq8PseudoWB_register:
+    case ARM::VLD1DUPq16PseudoWB_register:
+    case ARM::VLD1DUPq32PseudoWB_register:
     case ARM::VLD2DUPd8Pseudo:
     case ARM::VLD2DUPd16Pseudo:
     case ARM::VLD2DUPd32Pseudo:
@@ -1173,12 +1195,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VST2q8Pseudo:
     case ARM::VST2q16Pseudo:
     case ARM::VST2q32Pseudo:
-    case ARM::VST2d8Pseudo_UPD:
-    case ARM::VST2d16Pseudo_UPD:
-    case ARM::VST2d32Pseudo_UPD:
-    case ARM::VST2q8Pseudo_UPD:
-    case ARM::VST2q16Pseudo_UPD:
-    case ARM::VST2q32Pseudo_UPD:
+    case ARM::VST2d8PseudoWB_fixed:
+    case ARM::VST2d16PseudoWB_fixed:
+    case ARM::VST2d32PseudoWB_fixed:
+    case ARM::VST2q8PseudoWB_fixed:
+    case ARM::VST2q16PseudoWB_fixed:
+    case ARM::VST2q32PseudoWB_fixed:
+    case ARM::VST2d8PseudoWB_register:
+    case ARM::VST2d16PseudoWB_register:
+    case ARM::VST2d32PseudoWB_register:
+    case ARM::VST2q8PseudoWB_register:
+    case ARM::VST2q16PseudoWB_register:
+    case ARM::VST2q32PseudoWB_register:
     case ARM::VST3d8Pseudo:
     case ARM::VST3d16Pseudo:
     case ARM::VST3d32Pseudo:
@@ -1186,7 +1214,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VST3d8Pseudo_UPD:
     case ARM::VST3d16Pseudo_UPD:
     case ARM::VST3d32Pseudo_UPD:
-    case ARM::VST1d64TPseudo_UPD:
+    case ARM::VST1d64TPseudoWB_fixed:
+    case ARM::VST1d64TPseudoWB_register:
     case ARM::VST3q8Pseudo_UPD:
     case ARM::VST3q16Pseudo_UPD:
     case ARM::VST3q32Pseudo_UPD:
@@ -1203,7 +1232,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VST4d8Pseudo_UPD:
     case ARM::VST4d16Pseudo_UPD:
     case ARM::VST4d32Pseudo_UPD:
-    case ARM::VST1d64QPseudo_UPD:
+    case ARM::VST1d64QPseudoWB_fixed:
+    case ARM::VST1d64QPseudoWB_register:
     case ARM::VST4q8Pseudo_UPD:
     case ARM::VST4q16Pseudo_UPD:
     case ARM::VST4q32Pseudo_UPD:
@@ -1291,12 +1321,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       ExpandLaneOp(MBBI);
       return true;
 
-    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
-    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
-    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
-    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
-    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
-    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false); return true;
+    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true); return true;
+    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
   }
 
   return false;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9bae422..a98dfc3 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -178,10 +178,12 @@ class ARMFastISel : public FastISel {
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
     bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt);
-    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt,
-                     bool allocReg);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+                     unsigned Alignment = 0, bool isZExt = true,
+                     bool allocReg = true);
                      
-    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+                      unsigned Alignment = 0);
     bool ARMComputeAddress(const Value *Obj, Address &Addr);
     void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
     bool ARMIsMemCpySmall(uint64_t Len);
@@ -227,8 +229,7 @@ class ARMFastISel : public FastISel {
 // we don't care about implicit defs here, just places we'll need to add a
 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.hasOptionalDef())
+  if (!MI->hasOptionalDef())
     return false;
 
   // Look to see if our OptionalDef is defining CPSR or CCR.
@@ -702,7 +703,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
     TargetRegisterClass* RC = TLI.getRegClassFor(VT);
     unsigned ResultReg = createResultReg(RC);
     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ResultReg)
                             .addFrameIndex(SI->second)
                             .addImm(0));
@@ -898,7 +899,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
                               ARM::GPRRegisterClass;
     unsigned ResultReg = createResultReg(RC);
     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ResultReg)
                             .addFrameIndex(Addr.Base.FI)
                             .addImm(0));
@@ -937,7 +938,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
     // Now add the rest of the operands.
     MIB.addFrameIndex(FI);
 
-    // ARM halfword load/stores and signed byte loads need an additional operand.
+    // ARM halfword load/stores and signed byte loads need an additional
+    // operand.
     if (useAM3) {
       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
       MIB.addReg(0);
@@ -950,7 +952,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
     // Now add the rest of the operands.
     MIB.addReg(Addr.Base.Reg);
 
-    // ARM halfword load/stores and signed byte loads need an additional operand.
+    // ARM halfword load/stores and signed byte loads need an additional
+    // operand.
     if (useAM3) {
       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
       MIB.addReg(0);
@@ -963,10 +966,11 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
 }
 
 bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
-                              bool isZExt = true, bool allocReg = true) {
+                              unsigned Alignment, bool isZExt, bool allocReg) {
   assert(VT.isSimple() && "Non-simple types are invalid here!");
   unsigned Opc;
   bool useAM3 = false;
+  bool needVMOV = false;
   TargetRegisterClass *RC;  
   switch (VT.getSimpleVT().SimpleTy) {
     // This is mostly going to be Neon/vector support.
@@ -1012,10 +1016,25 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
       RC = ARM::GPRRegisterClass;
       break;
     case MVT::f32:
-      Opc = ARM::VLDRS;
-      RC = TLI.getRegClassFor(VT);
+      if (!Subtarget->hasVFP2()) return false;
+      // Unaligned loads need special handling. Floats require word-alignment.
+      if (Alignment && Alignment < 4) {
+        needVMOV = true;
+        VT = MVT::i32;
+        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+        RC = ARM::GPRRegisterClass;
+      } else {
+        Opc = ARM::VLDRS;
+        RC = TLI.getRegClassFor(VT);
+      }
       break;
     case MVT::f64:
+      if (!Subtarget->hasVFP2()) return false;
+      // FIXME: Unaligned loads need special handling.  Doublewords require
+      // word-alignment.
+      if (Alignment && Alignment < 4)
+        return false;
+
       Opc = ARM::VLDRD;
       RC = TLI.getRegClassFor(VT);
       break;
@@ -1030,6 +1049,16 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                                     TII.get(Opc), ResultReg);
   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+  // If we had an unaligned load of a float we've converted it to an regular
+  // load.  Now we must move from the GRP to the FP register.
+  if (needVMOV) {
+    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::VMOVSR), MoveReg)
+                    .addReg(ResultReg));
+    ResultReg = MoveReg;
+  }
   return true;
 }
 
@@ -1048,12 +1077,14 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
 
   unsigned ResultReg;
-  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+    return false;
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+                               unsigned Alignment) {
   unsigned StrOpc;
   bool useAM3 = false;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -1101,10 +1132,26 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
       break;
     case MVT::f32:
       if (!Subtarget->hasVFP2()) return false;
-      StrOpc = ARM::VSTRS;
+      // Unaligned stores need special handling. Floats require word-alignment.
+      if (Alignment && Alignment < 4) {
+        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(ARM::VMOVRS), MoveReg)
+                        .addReg(SrcReg));
+        SrcReg = MoveReg;
+        VT = MVT::i32;
+        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+      } else {
+        StrOpc = ARM::VSTRS;
+      }
       break;
     case MVT::f64:
       if (!Subtarget->hasVFP2()) return false;
+      // FIXME: Unaligned stores need special handling.  Doublewords require
+      // word-alignment.
+      if (Alignment && Alignment < 4)
+          return false;
+
       StrOpc = ARM::VSTRD;
       break;
   }
@@ -1141,7 +1188,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
   if (!ARMComputeAddress(I->getOperand(1), Addr))
     return false;
 
-  if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+    return false;
   return true;
 }
 
@@ -1360,7 +1408,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   unsigned SrcReg1 = getRegForValue(Src1Value);
   if (SrcReg1 == 0) return false;
 
-  unsigned SrcReg2;
+  unsigned SrcReg2 = 0;
   if (!UseImm) {
     SrcReg2 = getRegForValue(Src2Value);
     if (SrcReg2 == 0) return false;
@@ -1577,7 +1625,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
       (ARM_AM::getSOImmVal(Imm) != -1);
   }
 
-  unsigned Op2Reg;
+  unsigned Op2Reg = 0;
   if (!UseImm) {
     Op2Reg = getRegForValue(I->getOperand(2));
     if (Op2Reg == 0) return false;
@@ -1716,7 +1764,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
       if (Subtarget->hasVFP2() &&
-          FloatABIType == FloatABI::Hard)
+          TM.Options.FloatABIType == FloatABI::Hard)
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else
         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1765,21 +1813,23 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
     switch (VA.getLocInfo()) {
       case CCValAssign::Full: break;
       case CCValAssign::SExt: {
-        EVT DestVT = VA.getLocVT();
+        MVT DestVT = VA.getLocVT();
         unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
                                            /*isZExt*/false);
         assert (ResultReg != 0 && "Failed to emit a sext");
         Arg = ResultReg;
+        ArgVT = DestVT;
         break;
       }
       case CCValAssign::AExt:
         // Intentional fall-through.  Handle AExt and ZExt.
       case CCValAssign::ZExt: {
-        EVT DestVT = VA.getLocVT();
+        MVT DestVT = VA.getLocVT();
         unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT,
                                            /*isZExt*/true);
         assert (ResultReg != 0 && "Failed to emit a sext");
         Arg = ResultReg;
+        ArgVT = DestVT;
         break;
       }
       case CCValAssign::BCvt: {
@@ -2456,7 +2506,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
   if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
   
   unsigned ResultReg = MI->getOperand(0).getReg();
-  if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false))
+  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
     return false;
   MI->eraseFromParent();
   return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2d1de6f..06944b1 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -37,7 +37,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   // Always eliminate non-leaf frame pointers.
-  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+           MFI->hasCalls()) ||
           RegInfo->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -309,8 +310,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->getDesc().isReturn() &&
-         "Can only insert epilog into returning blocks");
+  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a2..a5fd15b 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
   // FIXME: Detect integer instructions properly.
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
-  if (MCID.mayStore())
+  if (MI->mayStore())
     return false;
   unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   MachineInstr *MI = SU->getInstr();
 
   if (!MI->isDebugValue()) {
-    if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
-      return Hazard;
-
     // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
     // a VMLA / VMLS will cause 4 cycle stall.
     const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       MachineInstr *DefMI = LastMI;
       const MCInstrDesc &LastMCID = LastMI->getDesc();
       // Skip over one non-VFP / NEON instruction.
-      if (!LastMCID.isBarrier() &&
+      if (!LastMI->isBarrier() &&
           // On A9, AGU and NEON/FPU are muxed.
-          !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+          !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
           (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 void ARMHazardRecognizer::Reset() {
   LastMI = 0;
   FpMLxStalls = 0;
-  ITBlockSize = 0;
   ScoreboardHazardRecognizer::Reset();
 }
 
 void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
   MachineInstr *MI = SU->getInstr();
-  unsigned Opcode = MI->getOpcode();
-  if (ITBlockSize) {
-    --ITBlockSize;
-  } else if (Opcode == ARM::t2IT) {
-    unsigned Mask = MI->getOperand(1).getImm();
-    unsigned NumTZ = CountTrailingZeros_32(Mask);
-    assert(NumTZ <= 3 && "Invalid IT mask!");
-    ITBlockSize = 4 - NumTZ;
-    MachineBasicBlock::iterator I = MI;
-    for (unsigned i = 0; i < ITBlockSize; ++i) {
-      // Advance to the next instruction, skipping any dbg_value instructions.
-      do {
-        ++I;
-      } while (I->isDebugValue());
-      ITBlockMIs[ITBlockSize-1-i] = &*I;
-    }
-  }
-
   if (!MI->isDebugValue()) {
     LastMI = MI;
     FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d..98bfc4c 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
 class ARMSubtarget;
 class MachineInstr;
 
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
 class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
   const ARMBaseInstrInfo &TII;
   const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
 
   MachineInstr *LastMI;
   unsigned FpMLxStalls;
-  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
-  MachineInstr *ITBlockMIs[4];
 
 public:
   ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
                       const ARMSubtarget &sti,
                       const ScheduleDAG *DAG) :
     ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
-    TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+    TRI(tri), STI(sti), LastMI(0) {}
 
   virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index bc8588f..7473141 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1579,6 +1579,22 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register;
   case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register;
   case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register;
+  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+  case ARM::VLD2d8PseudoWB_fixed: return ARM::VLD2d8PseudoWB_register;
+  case ARM::VLD2d16PseudoWB_fixed: return ARM::VLD2d16PseudoWB_register;
+  case ARM::VLD2d32PseudoWB_fixed: return ARM::VLD2d32PseudoWB_register;
+  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+  case ARM::VST2d8PseudoWB_fixed: return ARM::VST2d8PseudoWB_register;
+  case ARM::VST2d16PseudoWB_fixed: return ARM::VST2d16PseudoWB_register;
+  case ARM::VST2d32PseudoWB_fixed: return ARM::VST2d32PseudoWB_register;
+  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
   }
   return Opc; // If not one we handle, return it unchanged.
 }
@@ -1646,13 +1662,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Align);
     if (isUpdating) {
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
-      // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0
+      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
       // case entirely when the rest are updated to that form, too.
-      if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
-      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
-      if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
+      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64PseudoWB_fixed) ||
           !isa<ConstantSDNode>(Inc.getNode()))
         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
     }
@@ -1796,9 +1812,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Align);
     if (isUpdating) {
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
-      // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0
+      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
       // case entirely when the rest are updated to that form, too.
-      if (NumVecs == 1 && !isa<ConstantSDNode>(Inc.getNode()))
+      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
         Opc = getVLDSTRegisterUpdateOpcode(Opc);
       // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
       // check for that explicitly too. Horribly hacky, but temporary.
@@ -2810,10 +2826,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2_UPD: {
-    unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
-                            ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed};
-    unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
-                            ARM::VLD2q32Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VLD2d8PseudoWB_fixed,
+                            ARM::VLD2d16PseudoWB_fixed,
+                            ARM::VLD2d32PseudoWB_fixed,
+                            ARM::VLD1q64PseudoWB_fixed};
+    unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+                            ARM::VLD2q16PseudoWB_fixed,
+                            ARM::VLD2q32PseudoWB_fixed };
     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
   }
 
@@ -2876,16 +2895,19 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VST2_UPD: {
-    unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
-                            ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed};
-    unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
-                            ARM::VST2q32Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VST2d8PseudoWB_fixed,
+                            ARM::VST2d16PseudoWB_fixed,
+                            ARM::VST2d32PseudoWB_fixed,
+                            ARM::VST1q64PseudoWB_fixed};
+    unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+                            ARM::VST2q16PseudoWB_fixed,
+                            ARM::VST2q32PseudoWB_fixed };
     return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
   }
 
   case ARMISD::VST3_UPD: {
     unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
-                            ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+                            ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
     unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
                              ARM::VST3q16Pseudo_UPD,
                              ARM::VST3q32Pseudo_UPD };
@@ -2897,7 +2919,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
   case ARMISD::VST4_UPD: {
     unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
-                            ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+                            ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
     unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
                              ARM::VST4q16Pseudo_UPD,
                              ARM::VST4q32Pseudo_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8c4c06f..c6c1f5b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -72,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
   cl::init(true));
 
-namespace llvm {
+namespace {
   class ARMCCState : public CCState {
   public:
     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -432,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
   else
     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
     if (!Subtarget->isFPOnlySP())
       addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -467,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
     // neither Neon nor VFP support any arithmetic operations on it.
+    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+    // supported for v4f32.
     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    // FIXME: Code duplication: FDIV and FREM are expanded always, see
+    // ARMTargetLowering::addTypeForNEON method for details.
     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    // FIXME: Create unittest.
+    // In another words, find a way when "copysign" appears in DAG with vector
+    // operands.
     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    // FIXME: Code duplication: SETCC has custom operation action, see
+    // ARMTargetLowering::addTypeForNEON method for details.
     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FNEG and for FABS.
     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -486,11 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+    
+    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
 
     // Neon does not support some operations on v1i64 and v2i64 types.
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -586,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 
+  // These just redirect to CTTZ and CTLZ on ARM.
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
+
   // Only ARMv6 has BSWAP.
   if (!Subtarget->hasV6Ops())
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -674,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -712,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
   }
@@ -723,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
   // Various VFP goodness
-  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
     if (Subtarget->hasVFP2()) {
       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -751,7 +780,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
-  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+      !Subtarget->hasVFP2())
     setSchedulingPreference(Sched::RegPressure);
   else
     setSchedulingPreference(Sched::Hybrid);
@@ -1092,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
     if (!Subtarget->isAAPCS_ABI())
       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
     else if (Subtarget->hasVFP2() &&
-             FloatABIType == FloatABI::Hard && !isVarArg)
+             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+             !isVarArg)
       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   }
@@ -2951,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
 
-  if (UnsafeFPMath &&
+  if (getTargetMachine().Options.UnsafeFPMath &&
       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
        CC == ISD::SETNE || CC == ISD::SETUNE)) {
     SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3978,9 +4009,8 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       }
 
       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
-      if (VT == MVT::v2f32 || VT == MVT::v4f32) {
-        ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0));
-        int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF());
+      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+        int ImmVal = ARM_AM::getFP32Imm(SplatBits);
         if (ImmVal != -1) {
           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
@@ -6010,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
     // executed.
     for (MachineBasicBlock::reverse_iterator
            II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
-      if (!II->getDesc().isCall()) continue;
+      if (!II->isCall()) continue;
 
       DenseMap<unsigned, bool> DefRegs;
       for (MachineInstr::mop_iterator
@@ -6421,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
-  const MCInstrDesc *MCID = &MI->getDesc();
-  if (!MCID->hasPostISelHook()) {
+  if (!MI->hasPostISelHook()) {
     assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
            "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
     return;
   }
 
+  const MCInstrDesc *MCID = &MI->getDesc();
   // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
   // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
   // operand is still set to noreg. If needed, set the optional operand's
@@ -6454,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 
   // Any ARM instruction that sets the 's' bit should specify an optional
   // "cc_out" operand in the last operand position.
-  if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
+  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
     assert(!NewOpc && "Optional cc_out operand required");
     return;
   }
@@ -7948,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
     // will return -0, so vmin can only be used for unsafe math or if one of
     // the operands is known to be nonzero.
     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
-        !UnsafeFPMath &&
+        !DAG.getTarget().Options.UnsafeFPMath &&
         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
       break;
     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7970,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
     // will return +0, so vmax can only be used for unsafe math or if one of
     // the operands is known to be nonzero.
     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
-        !UnsafeFPMath &&
+        !DAG.getTarget().Options.UnsafeFPMath &&
         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
       break;
     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 6940156..80f3773 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
 //     16       imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
 //     32       imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
 //     64       64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
 def shr_imm8  : Operand<i32> {
   let EncoderMethod = "getShiftRight8Imm";
   let DecoderMethod = "DecodeShiftRight8Imm";
+  let ParserMatchClass = shr_imm8_asm_operand;
 }
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
 def shr_imm16 : Operand<i32> {
   let EncoderMethod = "getShiftRight16Imm";
   let DecoderMethod = "DecodeShiftRight16Imm";
+  let ParserMatchClass = shr_imm16_asm_operand;
 }
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
 def shr_imm32 : Operand<i32> {
   let EncoderMethod = "getShiftRight32Imm";
   let DecoderMethod = "DecodeShiftRight32Imm";
+  let ParserMatchClass = shr_imm32_asm_operand;
 }
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
 def shr_imm64 : Operand<i32> {
   let EncoderMethod = "getShiftRight64Imm";
   let DecoderMethod = "DecodeShiftRight64Imm";
+  let ParserMatchClass = shr_imm64_asm_operand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
       : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
 class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
       : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+      : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+          Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+          Requires<[HasNEON]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Instruction templates.
@@ -1994,73 +2010,111 @@ class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
 
 // VFP/NEON Instruction aliases for type suffices.
 class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
-  InstAlias<!strconcat(opc, dt, asm), Result>;
-multiclass VFPDT8ReqInstAlias<string opc, string asm, dag Result> {
-  def I8 : VFPDataTypeInstAlias<opc, ".i8", asm, Result>;
-  def S8 : VFPDataTypeInstAlias<opc, ".s8", asm, Result>;
-  def U8 : VFPDataTypeInstAlias<opc, ".u8", asm, Result>;
-  def F8 : VFPDataTypeInstAlias<opc, ".p8", asm, Result>;
-}
-// VFPDT8ReqInstAlias plus plain ".8"
-multiclass VFPDT8InstAlias<string opc, string asm, dag Result> {
-  def _8 : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
-  defm : VFPDT8ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT16ReqInstAlias<string opc, string asm, dag Result> {
-  def I16 : VFPDataTypeInstAlias<opc, ".i16", asm, Result>;
-  def S16 : VFPDataTypeInstAlias<opc, ".s16", asm, Result>;
-  def U16 : VFPDataTypeInstAlias<opc, ".u16", asm, Result>;
-  def F16 : VFPDataTypeInstAlias<opc, ".p16", asm, Result>;
-}
-// VFPDT16ReqInstAlias plus plain ".16"
-multiclass VFPDT16InstAlias<string opc, string asm, dag Result> {
-  def _16 : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
-  defm : VFPDT16ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT32ReqInstAlias<string opc, string asm, dag Result> {
-  def I32 : VFPDataTypeInstAlias<opc, ".i32", asm, Result>;
-  def S32 : VFPDataTypeInstAlias<opc, ".s32", asm, Result>;
-  def U32 : VFPDataTypeInstAlias<opc, ".u32", asm, Result>;
-  def F32 : VFPDataTypeInstAlias<opc, ".f32", asm, Result>;
-  def F   : VFPDataTypeInstAlias<opc, ".f",   asm, Result>;
-}
-// VFPDT32ReqInstAlias plus plain ".32"
-multiclass VFPDT32InstAlias<string opc, string asm, dag Result> {
-  def _32 : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
-  defm : VFPDT32ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64ReqInstAlias<string opc, string asm, dag Result> {
-  def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
-  def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
-  def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
-  def F64 : VFPDataTypeInstAlias<opc, ".f64", asm, Result>;
-  def D   : VFPDataTypeInstAlias<opc, ".d",   asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64InstAlias<string opc, string asm, dag Result> {
-  def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
-  defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
-multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> {
-  def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>;
-  def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>;
-  def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>;
-  def D   : VFPDataTypeInstAlias<opc, ".d",   asm, Result>;
-}
-// VFPDT64ReqInstAlias plus plain ".64"
-multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> {
-  def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
-  defm : VFPDT64ReqInstAlias<opc, asm, Result>;
-}
+  InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
 multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
-  defm : VFPDT8InstAlias<opc, asm, Result>;
-  defm : VFPDT16InstAlias<opc, asm, Result>;
-  defm : VFPDT32InstAlias<opc, asm, Result>;
-  defm : VFPDT64InstAlias<opc, asm, Result>;
-}
-multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> {
-  defm : VFPDT8InstAlias<opc, asm, Result>;
-  defm : VFPDT16InstAlias<opc, asm, Result>;
-  defm : VFPDT32InstAlias<opc, asm, Result>;
-  defm : VFPDT64NoF64InstAlias<opc, asm, Result>;
-}
+  def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+  AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+multiclass NEONDT8ReqAsmPseudoInst<string opc, string asm, dag iops> {
+  def I8 : NEONDataTypeAsmPseudoInst<opc, ".i8", asm, iops>;
+  def S8 : NEONDataTypeAsmPseudoInst<opc, ".s8", asm, iops>;
+  def U8 : NEONDataTypeAsmPseudoInst<opc, ".u8", asm, iops>;
+  def P8 : NEONDataTypeAsmPseudoInst<opc, ".p8", asm, iops>;
+}
+// NEONDT8ReqAsmPseudoInst plus plain ".8"
+multiclass NEONDT8AsmPseudoInst<string opc, string asm, dag iops> {
+  def _8 : NEONDataTypeAsmPseudoInst<opc, ".8", asm, iops>;
+  defm _ : NEONDT8ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT16ReqAsmPseudoInst<string opc, string asm, dag iops> {
+  def I16 : NEONDataTypeAsmPseudoInst<opc, ".i16", asm, iops>;
+  def S16 : NEONDataTypeAsmPseudoInst<opc, ".s16", asm, iops>;
+  def U16 : NEONDataTypeAsmPseudoInst<opc, ".u16", asm, iops>;
+  def P16 : NEONDataTypeAsmPseudoInst<opc, ".p16", asm, iops>;
+}
+// NEONDT16ReqAsmPseudoInst plus plain ".16"
+multiclass NEONDT16AsmPseudoInst<string opc, string asm, dag iops> {
+  def _16 : NEONDataTypeAsmPseudoInst<opc, ".16", asm, iops>;
+  defm _ : NEONDT16ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT32ReqAsmPseudoInst<string opc, string asm, dag iops> {
+  def I32 : NEONDataTypeAsmPseudoInst<opc, ".i32", asm, iops>;
+  def S32 : NEONDataTypeAsmPseudoInst<opc, ".s32", asm, iops>;
+  def U32 : NEONDataTypeAsmPseudoInst<opc, ".u32", asm, iops>;
+  def F32 : NEONDataTypeAsmPseudoInst<opc, ".f32", asm, iops>;
+  def F   : NEONDataTypeAsmPseudoInst<opc, ".f",   asm, iops>;
+}
+// NEONDT32ReqAsmPseudoInst plus plain ".32"
+multiclass NEONDT32AsmPseudoInst<string opc, string asm, dag iops> {
+  def _32 : NEONDataTypeAsmPseudoInst<opc, ".32", asm, iops>;
+  defm _ : NEONDT32ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+  def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+  def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+  def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+  def F64 : NEONDataTypeAsmPseudoInst<opc, ".f64", asm, iops>;
+  def D   : NEONDataTypeAsmPseudoInst<opc, ".d",   asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64AsmPseudoInst<string opc, string asm, dag iops> {
+  def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+  defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDT64NoF64ReqAsmPseudoInst<string opc, string asm, dag iops> {
+  def I64 : NEONDataTypeAsmPseudoInst<opc, ".i64", asm, iops>;
+  def S64 : NEONDataTypeAsmPseudoInst<opc, ".s64", asm, iops>;
+  def U64 : NEONDataTypeAsmPseudoInst<opc, ".u64", asm, iops>;
+  def D   : NEONDataTypeAsmPseudoInst<opc, ".d",   asm, iops>;
+}
+// NEONDT64ReqAsmPseudoInst plus plain ".64"
+multiclass NEONDT64NoF64AsmPseudoInst<string opc, string asm, dag iops> {
+  def _64 : NEONDataTypeAsmPseudoInst<opc, ".64", asm, iops>;
+  defm _ : NEONDT64ReqAsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyAsmPseudoInst<string opc, string asm, dag iops> {
+  defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT64AsmPseudoInst<opc, asm, iops>;
+}
+multiclass NEONDTAnyNoF64AsmPseudoInst<string opc, string asm, dag iops> {
+  defm _ : NEONDT8AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT16AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT32AsmPseudoInst<opc, asm, iops>;
+  defm _ : NEONDT64NoF64AsmPseudoInst<opc, asm, iops>;
+}
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index be03924..516a080 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -238,27 +238,23 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
 }]>;
 
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
-  return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
 /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
 def imm16_31 : ImmLeaf<i32, [{
   return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
 }]>;
 
-def so_imm_neg :
-  PatLeaf<(imm), [{
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
     return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
-  }], so_imm_neg_XFORM>;
+  }], so_imm_neg_XFORM> {
+  let ParserMatchClass = so_imm_neg_asmoperand;
+}
 
 // Note: this pattern doesn't require an encoder method and such, as it's
 // only used on aliases (Pat<> and InstAlias<>). The actual encoding
-// is handled by the destination instructions, which use t2_so_imm.
+// is handled by the destination instructions, which use so_imm.
 def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
-def so_imm_not :
-  Operand<i32>, PatLeaf<(imm), [{
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
     return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
   }], so_imm_not_XFORM> {
   let ParserMatchClass = so_imm_not_asmoperand;
@@ -512,6 +508,14 @@ def arm_i32imm : PatLeaf<(imm), [{
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
 
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
 /// imm0_7 predicate - Immediate in the range [0,7].
 def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
 def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -520,6 +524,42 @@ def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
   let ParserMatchClass = Imm0_7AsmOperand;
 }
 
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+  let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+  let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+  let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+  let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+  let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+  let ParserMatchClass = Imm1_31AsmOperand;
+}
+
 /// imm0_15 predicate - Immediate in the range [0,15].
 def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
 def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
@@ -544,6 +584,14 @@ def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
   let ParserMatchClass = Imm0_32AsmOperand;
 }
 
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 64;
+}]> {
+  let ParserMatchClass = Imm0_63AsmOperand;
+}
+
 /// imm0_255 predicate - Immediate in the range [0,255].
 def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
 def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
@@ -812,6 +860,9 @@ def addrmode6dup : Operand<i32>,
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
   let EncoderMethod = "getAddrMode6DupAddressOpValue";
+  // FIXME: This is close, but not quite right. The alignment specifier is
+  // different.
+  let ParserMatchClass = AddrMode6AsmOperand;
 }
 
 // addrmodepc := pc + reg
@@ -2753,23 +2804,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
 //  Load / store multiple Instructions.
 //
 
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
                          InstrItinClass itin, InstrItinClass itin_upd> {
   // IA is the default, so no need for an explicit suffix on the
   // mnemonic here. Without it is the cannonical spelling.
   def IA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b01;       // Increment After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def IA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2778,16 +2831,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def DA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def DA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2796,16 +2851,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def DB :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def DB_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2814,16 +2871,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def IB :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def IB_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2834,10 +2893,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
 let neverHasSideEffects = 1 in {
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+                         IIC_iLoad_mu>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+                         IIC_iStore_mu>;
 
 } // neverHasSideEffects
 
@@ -2851,6 +2912,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
                      (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
       RegConstraint<"$Rn = $wb">;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+                               IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+                               IIC_iStore_mu>;
+
+
+
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
@@ -4999,6 +5070,32 @@ def : MnemonicAlias<"usubaddx", "usax">;
 // for isel.
 def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
                    (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+                   (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+                   (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+                   (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+                   (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+                   (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+                 (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+                 (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+                   (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+                   (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
 
 // The shifter forms of the MOV instruction are aliased to the ASR, LSL,
 // LSR, ROR, and RRX instructions.
@@ -5056,4 +5153,8 @@ def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
 
 // 'mul' instruction can be specified with only two operands.
 def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
-                   (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+                   (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+                   (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f2ca963..c40860d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -74,9 +74,11 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
   let MIOperandInfo = (ops i32imm);
 }
 
+// Register list of one D register.
 def VecListOneDAsmOperand : AsmOperandClass {
   let Name = "VecListOneD";
   let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
 }
 def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
   let ParserMatchClass = VecListOneDAsmOperand;
@@ -85,6 +87,7 @@ def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
 def VecListTwoDAsmOperand : AsmOperandClass {
   let Name = "VecListTwoD";
   let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
 }
 def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
   let ParserMatchClass = VecListTwoDAsmOperand;
@@ -93,6 +96,7 @@ def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> {
 def VecListThreeDAsmOperand : AsmOperandClass {
   let Name = "VecListThreeD";
   let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
 }
 def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
   let ParserMatchClass = VecListThreeDAsmOperand;
@@ -101,6 +105,7 @@ def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
 def VecListFourDAsmOperand : AsmOperandClass {
   let Name = "VecListFourD";
   let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
 }
 def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
   let ParserMatchClass = VecListFourDAsmOperand;
@@ -109,11 +114,92 @@ def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
 def VecListTwoQAsmOperand : AsmOperandClass {
   let Name = "VecListTwoQ";
   let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
 }
-def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwo"> {
+def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> {
   let ParserMatchClass = VecListTwoQAsmOperand;
 }
 
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
+  let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
+}
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two D registers, with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
 //===----------------------------------------------------------------------===//
 // NEON-specific DAG Nodes.
 //===----------------------------------------------------------------------===//
@@ -272,12 +358,23 @@ class VLDQWBregisterPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, rGPR:$offset), itin,
                 "$addr.addr = $wb">;
+
 class VLDQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
 class VLDQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr), itin,
+                "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, rGPR:$offset), itin,
+                "$addr.addr = $wb">;
+
+
 class VLDQQQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
                 "$src = $dst">;
@@ -462,31 +559,23 @@ defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;
 def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
 
 //   VLD2     : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+           InstrItinClass itin>
   : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
-          (ins addrmode6:$Rn), IIC_VLD2,
-          "vld2", Dt, "$Vd, $Rn", "", []> {
-  let Rm = 0b1111;
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt, RegisterOperand VdTy>
-  : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs VdTy:$Vd),
-          (ins addrmode6:$Rn), IIC_VLD2x2,
+          (ins addrmode6:$Rn), itin,
           "vld2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVLDInstruction";
 }
 
-def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>;
+def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>;
+def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>;
 
-def  VLD2q8   : VLD2Q<{0,0,?,?}, "8", VecListFourD>;
-def  VLD2q16  : VLD2Q<{0,1,?,?}, "16", VecListFourD>;
-def  VLD2q32  : VLD2Q<{1,0,?,?}, "32", VecListFourD>;
+def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
 
 def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
 def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
@@ -497,47 +586,56 @@ def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
 def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
 
 // ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
-          "vld2", Dt, "$Vd, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt, RegisterOperand VdTy>
-  : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs VdTy:$Vd, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
-          "vld2", Dt, "$Vd, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+                  RegisterOperand VdTy, InstrItinClass itin> {
+  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+                     (ins addrmode6:$Rn), itin,
+                     "vld2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), itin,
+                        "vld2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
+defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>;
 
-def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8", VecListFourD>;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>;
+defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
 
-def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_fixed     : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD2u>;
+def VLD2d8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
+def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>;
 
-def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
 
 // ...with double-spaced registers
-def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
-def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
+def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>;
+def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>;
+def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>;
+defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>;
 
 //   VLD3     : Vector Load (multiple 3-element structures)
 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -997,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
-          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
-          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+          (ins addrmode6dup:$Rn),
+          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+          [(set VecListOneDAllLanes:$Vd,
+                (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1025,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
 let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 
 class VLD1QDUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD1dup,
-          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
@@ -1038,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
 def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
 
 // ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
-          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
-          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+                     (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+                        (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
-          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
-          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+                     (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                     (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+                        (outs VecListTwoDAllLanes:$Vd, GPR:$wb),
+                        (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
 
-def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
 
-def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_fixed     : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_fixed    : VLDQWBfixedPseudo<IIC_VLD1dupu>;
+def VLD1DUPq8PseudoWB_register  : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
 
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
 class VLD2DUP<bits<4> op7_4, string Dt>
@@ -1329,94 +1460,109 @@ def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>;
 // ...with 3 registers
 class VST1D3<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
-          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+          (ins addrmode6:$Rn, VecListThreeD:$Vd),
+          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
-class VST1D3WB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3),
-          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+                    (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+                        IIC_VLD1x3u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T     : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T    : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T    : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T    : VST1D3<{1,1,0,?}, "64">;
 
-def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
 
-def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
 
 // ...with 4 registers
 class VST1D4<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+          (ins addrmode6:$Rn, VecListFourD:$Vd),
+          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
           []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
-class VST1D4WB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
-          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+                    (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+                        IIC_VLD1x4u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q     : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q    : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q    : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q    : VST1D4<{1,1,?,?}, "64">;
 
-def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
 
-def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
 
 //   VST2     : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
-          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
-  let Rm = 0b1111;
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
-          "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+            InstrItinClass itin>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
 
-def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
-def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
-def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
+def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListTwoD, IIC_VST2>;
+def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>;
+def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>;
 
-def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
-def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
-def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
+def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2>;
+def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
 
 def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
 def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
@@ -1427,47 +1573,76 @@ def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
 def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
 
 // ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
-          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+                   RegisterOperand VdTy> {
+  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+                     (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+                     "vst2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+                        "vst2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
-class VST2QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
-          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+                     (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+                     "vst2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+                        IIC_VLD1u,
+                        "vst2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListTwoD>;
+defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>;
+defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>;
 
-def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32">;
 
-def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_fixed     : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_fixed    : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_fixed    : VSTQWBPseudo<IIC_VST2u>;
+def VST2d8PseudoWB_register  : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>;
 
-def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_fixed     : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed    : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed    : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register  : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>;
 
 // ...with double-spaced registers
-def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListTwoQ, IIC_VST2>;
+def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>;
+def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>;
+defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListTwoQ>;
+defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>;
+defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>;
 
 //   VST3     : Vector Store (multiple 3-element structures)
 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1741,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
 // ...with address register writeback:
 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
-          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
-          "$addr.addr = $wb", []> {
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
   let Inst{4}   = Rn{4};
   let DecoderMethod = "DecodeVST2LN";
 }
@@ -2573,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 // Long shift by immediate.
 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              string OpcodeStr, string Dt,
-             ValueType ResTy, ValueType OpTy, SDNode OpNode>
+             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
-           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
            IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
            [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
                                           (i32 imm:$SIMM))))]>;
@@ -2805,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                    v4i32, v4i32, OpNode, Commutable>;
 }
 
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
-  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
-                       v4i16, ShOp>;
-  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
-                     v2i32, ShOp>;
-  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
-                       v8i16, v4i16, ShOp>;
-  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
                      v4i32, v2i32, ShOp>;
 }
 
@@ -3477,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
                       bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
   def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
 }
@@ -3574,7 +3746,7 @@ def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
                      v2f32, v2f32, fmul, 1>;
 def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
                      v4f32, v4f32, fmul, 1>;
-defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
 def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
 def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
                        v2f32, fmul>;
@@ -4285,18 +4457,18 @@ defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
-                ValueType OpTy, SDNode OpNode>
+                ValueType OpTy, Operand ImmTy, SDNode OpNode>
   : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
-           ResTy, OpTy, OpNode> {
+           ResTy, OpTy, ImmTy, OpNode> {
   let Inst{21-16} = op21_16;
   let DecoderMethod = "DecodeVSHLMaxInstruction";
 }
 def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
-                          v8i16, v8i8, NEONvshlli>;
+                          v8i16, v8i8, imm8, NEONvshlli>;
 def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
-                          v4i32, v4i16, NEONvshlli>;
+                          v4i32, v4i16, imm16, NEONvshlli>;
 def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
-                          v2i64, v2i32, NEONvshlli>;
+                          v2i64, v2i32, imm32, NEONvshlli>;
 
 //   VSHRN    : Vector Shift Right and Narrow
 defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4469,10 +4641,6 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
                 (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
 def : InstAlias<"vmov${p} $Vd, $Vm",
                 (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
-                              (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
-defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm",
-                              (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
 
 //   VMOV     : Vector Move (Immediate)
 
@@ -4932,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
 
 //   VEXT     : Vector Extract
 
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
   : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
         IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
         [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
-                                      (Ty DPR:$Vm), imm:$index)))]> {
+                                     (Ty DPR:$Vm), imm:$index)))]> {
   bits<4> index;
   let Inst{11-8} = index{3-0};
 }
 
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
   : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
-        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
         IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
         [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
-                                      (Ty QPR:$Vm), imm:$index)))]> {
+                                     (Ty QPR:$Vm), imm:$index)))]> {
   bits<4> index;
   let Inst{11-8} = index{3-0};
 }
 
-def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
+def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
   let Inst{11-8} = index{3-0};
 }
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
@@ -4968,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
                            (i32 imm:$index))),
           (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
-def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
+def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
   let Inst{11-8} = index{3-0};
 }
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+  let Inst{11} = index{0};
+  let Inst{10-8}    = 0b000;
+}
 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
                            (v4f32 QPR:$Vm),
                            (i32 imm:$index))),
@@ -5026,17 +5198,17 @@ def  VTBL1
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBL2
   : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+        (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 def  VTBL3
   : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 def  VTBL4
   : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+        (ins VecListFourD:$Vn, DPR:$Vm),
         NVTBLFrm, IIC_VTB4,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 } // hasExtraSrcRegAllocReq = 1
 
 def  VTBL2Pseudo
@@ -5056,18 +5228,18 @@ def  VTBX1
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBX2
   : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
-        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+        (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
 def  VTBX3
   : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
-        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
         NVTBLFrm, IIC_VTBX3,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+        "vtbx", "8", "$Vd, $Vn, $Vm",
         "$orig = $Vd", []>;
 def  VTBX4
-  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
-        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+        "vtbx", "8", "$Vd, $Vn, $Vm",
         "$orig = $Vd", []>;
 } // hasExtraSrcRegAllocReq = 1
 
@@ -5207,11 +5379,83 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
 // Assembler aliases
 //
 
-// VAND/VEOR/VORR accept but do not require a type suffix.
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+                    (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+                    (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+                    (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+                    (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+                    (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+                    (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+                    (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+                    (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+                    (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+                    (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+                    (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+                    (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+                    (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+                    (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+                    (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+                    (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+                    (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+                    (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+                    (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+                    (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+                    (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+                    (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+                    (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+                    (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+                    (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+                    (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
 defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
                          (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
 defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
                          (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
 defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
                          (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
 defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
@@ -5220,245 +5464,450 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
                          (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
 defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
                          (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
-
-// VLD1 requires a size suffix, but also accepts type specific variants.
-// Load one D register.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1d8wb_register  VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-
-// Load two D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-          (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1q8wb_register  VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-          (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, pred:$p)>;
-
-// Load three D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg,
-                                           addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                        (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg,
-                                            addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                        (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg,
-                                             addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                        (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg,
-                                             addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                        (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg,
-                                             addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-
-// Load four D registers.
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn",
-                          (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg,
-                                           addrmode6:$Rn, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!",
-                          (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                       (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg,
-                                           addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                       (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                       (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm",
-                       (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg,
-                                            addrmode6:$Rn, rGPR:$Rm, pred:$p)>;
-
-// VST1 requires a size suffix, but also accepts type specific variants.
-// Store one D register.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1d8wb_register  zero_reg, addrmode6:$Rn, rGPR:$Rm,
-                              VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
-                              VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
-                              VecListOneD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm,
-                              VecListOneD:$Vd, pred:$p)>;
-
-// Store two D registers.
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-                          (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-//        with writeback, fixed stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!",
-          (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>;
-//        with writeback, register stride
-defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1q8wb_register  zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1q16wb_register zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1q32wb_register zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm",
-          (VST1q64wb_register zero_reg, addrmode6:$Rn,
-                              rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>;
-
-// FIXME: The three and four register VST1 instructions haven't been moved
-// to the VecList* encoding yet, so we can't do assembly parsing support
-// for them. Uncomment these when that happens.
-// Load three D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>;
-
-// Load four D registers.
-//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn",
-//                          (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>;
-
-
-// VTRN instructions data type suffix aliases for more-specific types.
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm",
-                          (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
-                          (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm",
-                          (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
-
-defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm",
-                          (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
-                          (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>;
-defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm",
-                          (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+                    (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+                    (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+                    (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+                    (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+                    (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+                    (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+                    (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+                    (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+                    (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+                    (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+                    (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+                    (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+                    (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+                    (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+                    (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+                    (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+                    (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+                    (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+                    (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+                    (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+                    (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+                    (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+                    (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+                    (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+                    (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+                    (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+                    (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+                    (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+                    (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+                    (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+                    (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+                    (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+                    (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+                    (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+                    (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+                    (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+                    (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+                    (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+                    (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+                    (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+                    (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+                    (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+                    (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+                    (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+                    (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+                    (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+                    (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+                    (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+                    (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+                    (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+                    (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+                    (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+                    (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+                    (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+                    (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+                    (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+                    (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+                    (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+                    (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+                    (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+                    (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+                    (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+                    (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+                    (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+                    (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+                    (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+                    (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+                    (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+                    (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+                    (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+                    (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+                    (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+                    (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+                    (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+                    (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+                    (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+                    (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+                    (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+        NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+        NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VLD1LNdWB_register_Asm :
+        NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+        NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+        NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VST1LNdWB_register_Asm :
+        NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm",
+                  (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+        NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+        NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VLD2LNdWB_register_Asm :
+        NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+        NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+        NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+defm VST2LNdWB_register_Asm :
+        NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm",
+                  (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+                  (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+                  (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+                  (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+                  (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+                  (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+                  (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+                  (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+                    (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+                    (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+                    (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+                    (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index c6cc98d..ac1a229 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1131,9 +1131,6 @@ def tRSB :                      // A8.6.141
                "rsb", "\t$Rd, $Rn, #0",
                [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
 
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
-                 (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
 // Subtract with carry register
 let Uses = [CPSR] in
 def tSBC :                      // A8.6.151
@@ -1435,3 +1432,8 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
 // nothing).
 def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
 def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+                 (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 6129fa3..981592c 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -80,18 +80,19 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
 // only used on aliases (Pat<> and InstAlias<>). The actual encoding
 // is handled by the destination instructions, which use t2_so_imm.
 def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
-def t2_so_imm_not : Operand<i32>,
-                    PatLeaf<(imm), [{
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
   return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
 }], t2_so_imm_not_XFORM> {
   let ParserMatchClass = t2_so_imm_not_asmoperand;
 }
 
 // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
-                    PatLeaf<(imm), [{
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
   return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+}], t2_so_imm_neg_XFORM> {
+  let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
 
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
 def imm0_4095 : Operand<i32>,
@@ -1333,7 +1334,7 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
 
 let mayStore = 1, neverHasSideEffects = 1 in {
 def t2STR_PRE  : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
-                            (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+                            (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
                             "str", "\t$Rt, $addr!",
                             "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1357,13 +1358,13 @@ def t2STRB_PRE  : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
 } // mayStore = 1, neverHasSideEffects = 1
 
 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
-                            (ins rGPR:$Rt, addr_offset_none:$Rn,
+                            (ins GPRnopc:$Rt, addr_offset_none:$Rn,
                                  t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
                           "str", "\t$Rt, $Rn$offset",
                           "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
              [(set GPRnopc:$Rn_wb,
-                  (post_store rGPR:$Rt, addr_offset_none:$Rn,
+                  (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
                               t2am_imm8_offset:$offset))]>;
 
 def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -3971,6 +3972,18 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
 def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
 def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
 
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+                  (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+                  (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+                  (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+                  (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
 // STMDB/STMDB_UPD aliases w/ the optional .w suffix
 def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
                   (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
@@ -4084,8 +4097,50 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
 // for isel.
 def : t2InstAlias<"mov${p} $Rd, $imm",
                   (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+                  (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+                  (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+                  (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+                  (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+                  (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+                  (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+                  (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+                           pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+                  (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+                  (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
 
 
 // Wide 'mul' encoding can be specified with only two operands.
 def : t2InstAlias<"mul${p} $Rn, $Rm",
-                  (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>;
+                  (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+                  (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+                         (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+                          (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+                  (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e420135..5d43556 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1160,18 +1160,64 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
 //===----------------------------------------------------------------------===//
 // Assembler aliases.
 //
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
 
 def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+                    (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+                    (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+                    (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+                    (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
 
 // VLDR/VSTR accept an optional type suffix.
-defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr",
-                        (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr",
-                        (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr",
-                        (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
-defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr",
-                        (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+                    (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+                    (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+                    (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+                    (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
 
 // VMUL has a two-operand form (implied destination operand)
 def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8728f4..6712fb6 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -1471,19 +1472,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   while (++I != E) {
     if (I->isDebugValue() || MemOps.count(&*I))
       continue;
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
       return false;
-    if (isLd && MCID.mayStore())
+    if (isLd && I->mayStore())
       return false;
     if (!isLd) {
-      if (MCID.mayLoad())
+      if (I->mayLoad())
         return false;
       // It's not safe to move the first 'str' down.
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (MCID.mayStore())
+      if (I->mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1773,8 +1773,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   while (MBBI != E) {
     for (; MBBI != E; ++MBBI) {
       MachineInstr *MI = MBBI;
-      const MCInstrDesc &MCID = MI->getDesc();
-      if (MCID.isCall() || MCID.isTerminator()) {
+      if (MI->isCall() || MI->isTerminator()) {
         // Stop at barriers.
         ++MBBI;
         break;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6cbb24b..61b75cb 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,22 +38,25 @@ extern "C" void LLVMInitializeARMTarget() {
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   // Default to soft float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Soft;
+  if (Options.FloatABIType == FloatABI::Default)
+    this->Options.FloatABIType = FloatABI::Soft;
 }
 
 ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget),
+  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32-S32") :
@@ -73,9 +76,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     InstrInfo(Subtarget.hasThumb2()
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -143,10 +147,16 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) {
 }
 
 bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) {
-  if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
-    PM.add(createThumb2SizeReductionPass());
+  if (Subtarget.isThumb2()) {
+    if (!Subtarget.prefers32BitThumb())
+      PM.add(createThumb2SizeReductionPass());
+
+    // Constant island pass work on unbundled instructions.
+    PM.add(createUnpackMachineBundlesPass());
+  }
 
   PM.add(createARMConstantIslandPass());
+
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a1f517b..cd77822 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,6 +41,7 @@ private:
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
+                       const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
 
@@ -71,6 +72,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
  public:
   ARMTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS,
+                   const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
                    CodeGenOpt::Level OL);
 
@@ -112,6 +114,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
 public:
   ThumbTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS,
+                     const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1..721a225 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -36,6 +36,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                  ELF::SHF_WRITE |
                                  ELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
+    StructorOutputOrder = Structors::PriorityOrder;
     LSDASection = NULL;
   }
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bb83e5e..cd86065 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -39,10 +39,15 @@ namespace {
 
 class ARMOperand;
 
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
 class ARMAsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
 
+  // Map of register aliases registers via the .req directive.
+  StringMap<unsigned> RegisterReqs;
+
   struct {
     ARMCC::CondCodes Cond;    // Condition for IT block.
     unsigned Mask:4;          // Condition mask for instructions.
@@ -90,9 +95,12 @@ class ARMAsmParser : public MCTargetAsmParser {
                               unsigned &ShiftAmount);
   bool parseDirectiveWord(unsigned Size, SMLoc L);
   bool parseDirectiveThumb(SMLoc L);
+  bool parseDirectiveARM(SMLoc L);
   bool parseDirectiveThumbFunc(SMLoc L);
   bool parseDirectiveCode(SMLoc L);
   bool parseDirectiveSyntax(SMLoc L);
+  bool parseDirectiveReq(StringRef Name, SMLoc L);
+  bool parseDirectiveUnreq(SMLoc L);
 
   StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
                           bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +169,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
 
   // Asm Match Converter Methods
   bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -271,6 +280,8 @@ class ARMOperand : public MCParsedAsmOperand {
     k_DPRRegisterList,
     k_SPRRegisterList,
     k_VectorList,
+    k_VectorListAllLanes,
+    k_VectorListIndexed,
     k_ShiftedRegister,
     k_ShiftedImmediate,
     k_ShifterImmediate,
@@ -324,6 +335,8 @@ class ARMOperand : public MCParsedAsmOperand {
     struct {
       unsigned RegNum;
       unsigned Count;
+      unsigned LaneIndex;
+      bool isDoubleSpaced;
     } VectorList;
 
     struct {
@@ -409,6 +422,8 @@ public:
       Registers = o.Registers;
       break;
     case k_VectorList:
+    case k_VectorListAllLanes:
+    case k_VectorListIndexed:
       VectorList = o.VectorList;
       break;
     case k_CoprocNum:
@@ -562,6 +577,22 @@ public:
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 256;
   }
+  bool isImm0_1() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 2;
+  }
+  bool isImm0_3() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 4;
+  }
   bool isImm0_7() const {
     if (Kind != k_Immediate)
       return false;
@@ -586,6 +617,94 @@ public:
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 32;
   }
+  bool isImm0_63() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 64;
+  }
+  bool isImm8() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 8;
+  }
+  bool isImm16() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 16;
+  }
+  bool isImm32() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 32;
+  }
+  bool isShrImm8() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 8;
+  }
+  bool isShrImm16() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 16;
+  }
+  bool isShrImm32() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 32;
+  }
+  bool isShrImm64() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 64;
+  }
+  bool isImm1_7() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 8;
+  }
+  bool isImm1_15() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 16;
+  }
+  bool isImm1_31() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 32;
+  }
   bool isImm1_16() const {
     if (Kind != k_Immediate)
       return false;
@@ -676,6 +795,14 @@ public:
     int64_t Value = CE->getValue();
     return ARM_AM::getSOImmVal(~Value) != -1;
   }
+  bool isARMSOImmNeg() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getSOImmVal(-Value) != -1;
+  }
   bool isT2SOImm() const {
     if (Kind != k_Immediate)
       return false;
@@ -692,6 +819,14 @@ public:
     int64_t Value = CE->getValue();
     return ARM_AM::getT2SOImmVal(~Value) != -1;
   }
+  bool isT2SOImmNeg() const {
+    if (Kind != k_Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getT2SOImmVal(-Value) != -1;
+  }
   bool isSetEndImm() const {
     if (Kind != k_Immediate)
       return false;
@@ -892,9 +1027,9 @@ public:
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [-255, -1].
-    if (!Memory.OffsetImm) return true;
+    if (!Memory.OffsetImm) return false;
     int64_t Val = Memory.OffsetImm->getValue();
-    return Val > -256 && Val < 0;
+    return (Val == INT32_MIN) || (Val > -256 && Val < 0);
   }
   bool isMemUImm12Offset() const {
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -940,31 +1075,75 @@ public:
   bool isProcIFlags() const { return Kind == k_ProcIFlags; }
 
   // NEON operands.
+  bool isSingleSpacedVectorList() const {
+    return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+  }
+  bool isDoubleSpacedVectorList() const {
+    return Kind == k_VectorList && VectorList.isDoubleSpaced;
+  }
   bool isVecListOneD() const {
-    if (Kind != k_VectorList) return false;
+    if (!isSingleSpacedVectorList()) return false;
     return VectorList.Count == 1;
   }
 
   bool isVecListTwoD() const {
-    if (Kind != k_VectorList) return false;
+    if (!isSingleSpacedVectorList()) return false;
     return VectorList.Count == 2;
   }
 
   bool isVecListThreeD() const {
-    if (Kind != k_VectorList) return false;
+    if (!isSingleSpacedVectorList()) return false;
     return VectorList.Count == 3;
   }
 
   bool isVecListFourD() const {
-    if (Kind != k_VectorList) return false;
+    if (!isSingleSpacedVectorList()) return false;
     return VectorList.Count == 4;
   }
 
   bool isVecListTwoQ() const {
-    if (Kind != k_VectorList) return false;
-    //FIXME: We haven't taught the parser to handle by-two register lists
-    // yet, so don't pretend to know one.
-    return VectorList.Count == 2 && false;
+    if (!isDoubleSpacedVectorList()) return false;
+    return VectorList.Count == 2;
+  }
+
+  bool isVecListOneDAllLanes() const {
+    if (Kind != k_VectorListAllLanes) return false;
+    return VectorList.Count == 1;
+  }
+
+  bool isVecListTwoDAllLanes() const {
+    if (Kind != k_VectorListAllLanes) return false;
+    return VectorList.Count == 2;
+  }
+
+  bool isVecListOneDByteIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListOneDHWordIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListOneDWordIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListTwoDByteIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListTwoDHWordIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListTwoDWordIndexed() const {
+    if (Kind != k_VectorListIndexed) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
   }
 
   bool isVectorIndex8() const {
@@ -1233,6 +1412,14 @@ public:
     Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
   }
 
+  void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The operand is actually a t2_so_imm, but we have its
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+  }
+
   void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The operand is actually a so_imm, but we have its bitwise
@@ -1241,6 +1428,14 @@ public:
     Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
   }
 
+  void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The operand is actually a so_imm, but we have its
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+  }
+
   void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -1527,37 +1722,15 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
   }
 
-  void addVecListOneDOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
-  }
-
-  void addVecListTwoDOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // Only the first register actually goes on the instruction. The rest
-    // are implied by the opcode.
-    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
-  }
-
-  void addVecListThreeDOperands(MCInst &Inst, unsigned N) const {
+  void addVecListOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    // Only the first register actually goes on the instruction. The rest
-    // are implied by the opcode.
     Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
   }
 
-  void addVecListFourDOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // Only the first register actually goes on the instruction. The rest
-    // are implied by the opcode.
-    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
-  }
-
-  void addVecListTwoQOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    // Only the first register actually goes on the instruction. The rest
-    // are implied by the opcode.
+  void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+    Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
   }
 
   void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
@@ -1780,10 +1953,32 @@ public:
   }
 
   static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
-                                      SMLoc S, SMLoc E) {
+                                      bool isDoubleSpaced, SMLoc S, SMLoc E) {
     ARMOperand *Op = new ARMOperand(k_VectorList);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
+    Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+                                              SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+                                             unsigned Index, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->VectorList.LaneIndex = Index;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
@@ -1982,6 +2177,14 @@ void ARMOperand::print(raw_ostream &OS) const {
     OS << "<vector_list " << VectorList.Count << " * "
        << VectorList.RegNum << ">";
     break;
+  case k_VectorListAllLanes:
+    OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+       << VectorList.RegNum << ">";
+    break;
+  case k_VectorListIndexed:
+    OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+       << VectorList.Count << " * " << VectorList.RegNum << ">";
+    break;
   case k_Token:
     OS << "'" << getToken() << "'";
     break;
@@ -2000,7 +2203,9 @@ static unsigned MatchRegisterName(StringRef Name);
 
 bool ARMAsmParser::ParseRegister(unsigned &RegNo,
                                  SMLoc &StartLoc, SMLoc &EndLoc) {
+  StartLoc = Parser.getTok().getLoc();
   RegNo = tryParseRegister();
+  EndLoc = Parser.getTok().getLoc();
 
   return (RegNo == (unsigned)-1);
 }
@@ -2013,8 +2218,6 @@ int ARMAsmParser::tryParseRegister() {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) return -1;
 
-  // FIXME: Validate register for the current architecture; we have to do
-  // validation later, so maybe there is no need for this here.
   std::string lowerCase = Tok.getString().lower();
   unsigned RegNum = MatchRegisterName(lowerCase);
   if (!RegNum) {
@@ -2023,9 +2226,34 @@ int ARMAsmParser::tryParseRegister() {
       .Case("r14", ARM::LR)
       .Case("r15", ARM::PC)
       .Case("ip", ARM::R12)
+      // Additional register name aliases for 'gas' compatibility.
+      .Case("a1", ARM::R0)
+      .Case("a2", ARM::R1)
+      .Case("a3", ARM::R2)
+      .Case("a4", ARM::R3)
+      .Case("v1", ARM::R4)
+      .Case("v2", ARM::R5)
+      .Case("v3", ARM::R6)
+      .Case("v4", ARM::R7)
+      .Case("v5", ARM::R8)
+      .Case("v6", ARM::R9)
+      .Case("v7", ARM::R10)
+      .Case("v8", ARM::R11)
+      .Case("sb", ARM::R9)
+      .Case("sl", ARM::R10)
+      .Case("fp", ARM::R11)
       .Default(0);
   }
-  if (!RegNum) return -1;
+  if (!RegNum) {
+    // Check for aliases registered via .req.
+    StringMap<unsigned>::const_iterator Entry =
+      RegisterReqs.find(Tok.getIdentifier());
+    // If no match, return failure.
+    if (Entry == RegisterReqs.end())
+      return -1;
+    Parser.Lex(); // Eat identifier token.
+    return Entry->getValue();
+  }
 
   Parser.Lex(); // Eat identifier token.
 
@@ -2045,6 +2273,7 @@ int ARMAsmParser::tryParseShiftRegister(
 
   std::string lowerCase = Tok.getString().lower();
   ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+      .Case("asl", ARM_AM::lsl)
       .Case("lsl", ARM_AM::lsl)
       .Case("lsr", ARM_AM::lsr)
       .Case("asr", ARM_AM::asr)
@@ -2073,7 +2302,8 @@ int ARMAsmParser::tryParseShiftRegister(
     ShiftReg = SrcReg;
   } else {
     // Figure out if this is shifted by a constant or a register (for non-RRX).
-    if (Parser.getTok().is(AsmToken::Hash)) {
+    if (Parser.getTok().is(AsmToken::Hash) ||
+        Parser.getTok().is(AsmToken::Dollar)) {
       Parser.Lex(); // Eat hash.
       SMLoc ImmLoc = Parser.getTok().getLoc();
       const MCExpr *ShiftExpr = 0;
@@ -2446,6 +2676,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex(); // Eat the comma.
     RegLoc = Parser.getTok().getLoc();
     int OldReg = Reg;
+    const AsmToken RegTok = Parser.getTok();
     Reg = tryParseRegister();
     if (Reg == -1)
       return Error(RegLoc, "register expected");
@@ -2459,8 +2690,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
     // List must be monotonically increasing.
-    if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
+    if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg))
       return Error(RegLoc, "register list not in ascending order");
+    if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+      Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+              ") in register list");
+      continue;
+    }
     // VFP register lists must also be contiguous.
     // It's OK to use the enumeration values directly here rather, as the
     // VFP register classes have the enum sorted properly.
@@ -2477,13 +2713,55 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return Error(E, "'}' expected");
   Parser.Lex(); // Eat '}' token.
 
+  // Push the register list operand.
   Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+  // The ARM system instruction variants for LDM/STM have a '^' token here.
+  if (Parser.getTok().is(AsmToken::Caret)) {
+    Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+    Parser.Lex(); // Eat '^' token.
+  }
+
   return false;
 }
 
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+  Index = 0; // Always return a defined index value.
+  if (Parser.getTok().is(AsmToken::LBrac)) {
+    Parser.Lex(); // Eat the '['.
+    if (Parser.getTok().is(AsmToken::RBrac)) {
+      // "Dn[]" is the 'all lanes' syntax.
+      LaneKind = AllLanes;
+      Parser.Lex(); // Eat the ']'.
+      return MatchOperand_Success;
+    }
+    if (Parser.getTok().is(AsmToken::Integer)) {
+      int64_t Val = Parser.getTok().getIntVal();
+      // Make this range check context sensitive for .8, .16, .32.
+      if (Val < 0 && Val > 7)
+        Error(Parser.getTok().getLoc(), "lane index out of range");
+      Index = Val;
+      LaneKind = IndexedLane;
+      Parser.Lex(); // Eat the token;
+      if (Parser.getTok().isNot(AsmToken::RBrac))
+        Error(Parser.getTok().getLoc(), "']' expected");
+      Parser.Lex(); // Eat the ']'.
+      return MatchOperand_Success;
+    }
+    Error(Parser.getTok().getLoc(), "lane index must be empty or an integer");
+    return MatchOperand_ParseFail;
+  }
+  LaneKind = NoLanes;
+  return MatchOperand_Success;
+}
+
 // parse a vector register list
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  VectorLaneTy LaneKind;
+  unsigned LaneIndex;
   SMLoc S = Parser.getTok().getLoc();
   // As an extension (to match gas), support a plain D register or Q register
   // (without encosing curly braces) as a single or double entry list,
@@ -2494,12 +2772,48 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       return MatchOperand_NoMatch;
     SMLoc E = Parser.getTok().getLoc();
     if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
-      Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E));
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      if (Res != MatchOperand_Success)
+        return Res;
+      switch (LaneKind) {
+      default:
+        assert(0 && "unexpected lane kind!");
+      case NoLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+        break;
+      case AllLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E));
+        break;
+      case IndexedLane:
+        Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+                                                               LaneIndex, S,E));
+        break;
+      }
       return MatchOperand_Success;
     }
     if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
       Reg = getDRegFromQReg(Reg);
-      Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E));
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      if (Res != MatchOperand_Success)
+        return Res;
+      switch (LaneKind) {
+      default:
+        assert(0 && "unexpected lane kind!");
+      case NoLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+        break;
+      case AllLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E));
+        break;
+      case IndexedLane:
+        Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+                                                               LaneIndex, S,E));
+        break;
+      }
       return MatchOperand_Success;
     }
     Error(S, "vector register expected");
@@ -2518,18 +2832,30 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return MatchOperand_ParseFail;
   }
   unsigned Count = 1;
+  int Spacing = 0;
   unsigned FirstReg = Reg;
   // The list is of D registers, but we also allow Q regs and just interpret
   // them as the two D sub-registers.
   if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
     FirstReg = Reg = getDRegFromQReg(Reg);
+    Spacing = 1; // double-spacing requires explicit D registers, otherwise
+                 // it's ambiguous with four-register single spaced.
     ++Reg;
     ++Count;
   }
+  if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
 
   while (Parser.getTok().is(AsmToken::Comma) ||
          Parser.getTok().is(AsmToken::Minus)) {
     if (Parser.getTok().is(AsmToken::Minus)) {
+      if (!Spacing)
+        Spacing = 1; // Register range implies a single spaced list.
+      else if (Spacing == 2) {
+        Error(Parser.getTok().getLoc(),
+              "sequential registers in double spaced list");
+        return MatchOperand_ParseFail;
+      }
       Parser.Lex(); // Eat the minus.
       SMLoc EndLoc = Parser.getTok().getLoc();
       int EndReg = tryParseRegister();
@@ -2554,6 +2880,16 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         Error(EndLoc, "bad range in register list");
         return MatchOperand_ParseFail;
       }
+      // Parse the lane specifier if present.
+      VectorLaneTy NextLaneKind;
+      unsigned NextLaneIndex;
+      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+        return MatchOperand_ParseFail;
+      if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+        Error(EndLoc, "mismatched lane index in register list");
+        return MatchOperand_ParseFail;
+      }
+      EndLoc = Parser.getTok().getLoc();
 
       // Add all the registers in the range to the register list.
       Count += EndReg - Reg;
@@ -2575,6 +2911,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     // The list is of D registers, but we also allow Q regs and just interpret
     // them as the two D sub-registers.
     if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+      if (!Spacing)
+        Spacing = 1; // Register range implies a single spaced list.
+      else if (Spacing == 2) {
+        Error(RegLoc,
+              "invalid register in double-spaced list (must be 'D' register')");
+        return MatchOperand_ParseFail;
+      }
       Reg = getDRegFromQReg(Reg);
       if (Reg != OldReg + 1) {
         Error(RegLoc, "non-contiguous register range");
@@ -2582,14 +2925,45 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       }
       ++Reg;
       Count += 2;
+      // Parse the lane specifier if present.
+      VectorLaneTy NextLaneKind;
+      unsigned NextLaneIndex;
+      SMLoc EndLoc = Parser.getTok().getLoc();
+      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+        return MatchOperand_ParseFail;
+      if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+        Error(EndLoc, "mismatched lane index in register list");
+        return MatchOperand_ParseFail;
+      }
       continue;
     }
-    // Normal D register. Just check that it's contiguous and keep going.
-    if (Reg != OldReg + 1) {
+    // Normal D register.
+    // Figure out the register spacing (single or double) of the list if
+    // we don't know it already.
+    if (!Spacing)
+      Spacing = 1 + (Reg == OldReg + 2);
+
+    // Just check that it's contiguous and keep going.
+    if (Reg != OldReg + Spacing) {
       Error(RegLoc, "non-contiguous register range");
       return MatchOperand_ParseFail;
     }
     ++Count;
+    // Parse the lane specifier if present.
+    VectorLaneTy NextLaneKind;
+    unsigned NextLaneIndex;
+    SMLoc EndLoc = Parser.getTok().getLoc();
+    if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+    if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+      Error(EndLoc, "mismatched lane index in register list");
+      return MatchOperand_ParseFail;
+    }
+    if (Spacing == 2 && LaneKind != NoLanes) {
+      Error(EndLoc,
+            "lane index specfier invalid in double spaced register list");
+      return MatchOperand_ParseFail;
+    }
   }
 
   SMLoc E = Parser.getTok().getLoc();
@@ -2599,7 +2973,22 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   }
   Parser.Lex(); // Eat '}' token.
 
-  Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, S, E));
+  switch (LaneKind) {
+  default:
+    assert(0 && "unexpected lane kind in register list.");
+  case NoLanes:
+    Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+                                                    (Spacing == 2), S, E));
+    break;
+  case AllLanes:
+    Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+                                                            S, E));
+    break;
+  case IndexedLane:
+    Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+                                                           LaneIndex, S, E));
+    break;
+  }
   return MatchOperand_Success;
 }
 
@@ -2786,7 +3175,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
   Parser.Lex(); // Eat shift type token.
 
   // There must be a '#' and a shift amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2864,7 +3254,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat the operator.
 
   // A '#' and a shift amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2924,7 +3315,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat the operator.
 
   // A '#' and a rotate amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2961,7 +3353,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   // The bitfield descriptor is really two operands, the LSB and the width.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2993,7 +3386,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return MatchOperand_ParseFail;
   }
   Parser.Lex(); // Eat hash token.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -3087,7 +3481,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Tok.getLoc();
 
   // Do immediates first, as we always parse those if we have a '#'.
-  if (Parser.getTok().is(AsmToken::Hash)) {
+  if (Parser.getTok().is(AsmToken::Hash) ||
+      Parser.getTok().is(AsmToken::Dollar)) {
     Parser.Lex(); // Eat the '#'.
     // Explicitly look for a '-', as we need to encode negative zero
     // differently.
@@ -3444,7 +3839,7 @@ bool ARMAsmParser::
 cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
               const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // Vd
-  ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
   // Create a writeback register dummy placeholder.
   Inst.addOperand(MCOperand::CreateImm(0));
   // Vn
@@ -3458,7 +3853,7 @@ bool ARMAsmParser::
 cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
                  const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // Vd
-  ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
   // Create a writeback register dummy placeholder.
   Inst.addOperand(MCOperand::CreateImm(0));
   // Vn
@@ -3478,7 +3873,7 @@ cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
   // Vn
   ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
   // Vt
-  ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
   // pred
   ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
   return true;
@@ -3494,7 +3889,7 @@ cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
   // Vm
   ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
   // Vt
-  ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1);
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
   // pred
   ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
   return true;
@@ -3591,8 +3986,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // offset. Be friendly and also accept a plain integer (without a leading
   // hash) for gas compatibility.
   if (Parser.getTok().is(AsmToken::Hash) ||
+      Parser.getTok().is(AsmToken::Dollar) ||
       Parser.getTok().is(AsmToken::Integer)) {
-    if (Parser.getTok().is(AsmToken::Hash))
+    if (Parser.getTok().isNot(AsmToken::Integer))
       Parser.Lex(); // Eat the '#'.
     E = Parser.getTok().getLoc();
 
@@ -3690,7 +4086,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
   if (Tok.isNot(AsmToken::Identifier))
     return true;
   StringRef ShiftName = Tok.getString();
-  if (ShiftName == "lsl" || ShiftName == "LSL")
+  if (ShiftName == "lsl" || ShiftName == "LSL" ||
+      ShiftName == "asl" || ShiftName == "ASL")
     St = ARM_AM::lsl;
   else if (ShiftName == "lsr" || ShiftName == "LSR")
     St = ARM_AM::lsr;
@@ -3710,7 +4107,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
     Loc = Parser.getTok().getLoc();
     // A '#' and a shift amount.
     const AsmToken &HashTok = Parser.getTok();
-    if (HashTok.isNot(AsmToken::Hash))
+    if (HashTok.isNot(AsmToken::Hash) &&
+        HashTok.isNot(AsmToken::Dollar))
       return Error(HashTok.getLoc(), "'#' expected");
     Parser.Lex(); // Eat hash token.
 
@@ -3739,7 +4137,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
 
-  if (Parser.getTok().isNot(AsmToken::Hash))
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
 
   // Disambiguate the VMOV forms that can accept an FP immediate.
@@ -3852,6 +4251,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     return parseMemory(Operands);
   case AsmToken::LCurly:
     return parseRegisterList(Operands);
+  case AsmToken::Dollar:
   case AsmToken::Hash: {
     // #42 -> immediate.
     // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3990,7 +4390,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
         Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
         Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
         Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
-        Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+        Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+        Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+        Mnemonic == "fsts" ||
         (Mnemonic == "movs" && isThumb()))) {
     Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
     CarrySetting = true;
@@ -4206,9 +4608,27 @@ static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
   return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
 }
 
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Apply mnemonic aliases before doing anything else, as the destination
+  // mnemnonic may include suffices and we want to handle them normally.
+  // The generic tblgen'erated code does this later, at the start of
+  // MatchInstructionImpl(), but that's too late for aliases that include
+  // any sort of suffix.
+  unsigned AvailableFeatures = getAvailableFeatures();
+  applyMnemonicAliases(Name, AvailableFeatures);
+
+  // First check for the ARM-specific .req directive.
+  if (Parser.getTok().is(AsmToken::Identifier) &&
+      Parser.getTok().getIdentifier() == ".req") {
+    parseDirectiveReq(Name, NameLoc);
+    // We always return 'error' for this, as we're done with this
+    // statement and don't need to match the 'instruction."
+    return true;
+  }
+
   // Create the leading tokens for the mnemonic, split by '.' characters.
   size_t Start = 0, Next = Name.find('.');
   StringRef Mnemonic = Name.slice(Start, Next);
@@ -4400,12 +4820,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
     }
   }
   // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
-  // end. Convert it to a token here.
+  // end. Convert it to a token here. Take care not to convert those
+  // that should hit the Thumb2 encoding.
   if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+      static_cast<ARMOperand*>(Operands[3])->isReg() &&
+      static_cast<ARMOperand*>(Operands[4])->isReg() &&
       static_cast<ARMOperand*>(Operands[5])->isImm()) {
     ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-    if (CE && CE->getValue() == 0) {
+    if (CE && CE->getValue() == 0 &&
+        (isThumbOne() ||
+         // The cc_out operand matches the IT block.
+         ((inITBlock() != CarrySetting) &&
+         // Neither register operand is a high register.
+         (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+          isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
       Operands.erase(Operands.begin() + 5);
       Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
       delete Op;
@@ -4605,11 +5034,495 @@ validateInstruction(MCInst &Inst,
   return false;
 }
 
+static unsigned getRealVSTLNOpcode(unsigned Opc) {
+  switch(Opc) {
+  default: assert(0 && "unexpected opcode!");
+  // VST1LN
+  case ARM::VST1LNdWB_fixed_Asm_8:  case ARM::VST1LNdWB_fixed_Asm_P8:
+  case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+  case ARM::VST1LNdWB_fixed_Asm_U8:
+    return ARM::VST1LNd8_UPD;
+  case ARM::VST1LNdWB_fixed_Asm_16:  case ARM::VST1LNdWB_fixed_Asm_P16:
+  case ARM::VST1LNdWB_fixed_Asm_I16: case ARM::VST1LNdWB_fixed_Asm_S16:
+  case ARM::VST1LNdWB_fixed_Asm_U16:
+    return ARM::VST1LNd16_UPD;
+  case ARM::VST1LNdWB_fixed_Asm_32:  case ARM::VST1LNdWB_fixed_Asm_F:
+  case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+  case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32:
+    return ARM::VST1LNd32_UPD;
+  case ARM::VST1LNdWB_register_Asm_8:  case ARM::VST1LNdWB_register_Asm_P8:
+  case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+  case ARM::VST1LNdWB_register_Asm_U8:
+    return ARM::VST1LNd8_UPD;
+  case ARM::VST1LNdWB_register_Asm_16:  case ARM::VST1LNdWB_register_Asm_P16:
+  case ARM::VST1LNdWB_register_Asm_I16: case ARM::VST1LNdWB_register_Asm_S16:
+  case ARM::VST1LNdWB_register_Asm_U16:
+    return ARM::VST1LNd16_UPD;
+  case ARM::VST1LNdWB_register_Asm_32:  case ARM::VST1LNdWB_register_Asm_F:
+  case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+  case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32:
+    return ARM::VST1LNd32_UPD;
+  case ARM::VST1LNdAsm_8:  case ARM::VST1LNdAsm_P8:
+  case ARM::VST1LNdAsm_I8: case ARM::VST1LNdAsm_S8:
+  case ARM::VST1LNdAsm_U8:
+    return ARM::VST1LNd8;
+  case ARM::VST1LNdAsm_16:  case ARM::VST1LNdAsm_P16:
+  case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+  case ARM::VST1LNdAsm_U16:
+    return ARM::VST1LNd16;
+  case ARM::VST1LNdAsm_32:  case ARM::VST1LNdAsm_F:
+  case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32:
+  case ARM::VST1LNdAsm_S32: case ARM::VST1LNdAsm_U32:
+    return ARM::VST1LNd32;
+
+  // VST2LN
+  case ARM::VST2LNdWB_fixed_Asm_8:  case ARM::VST2LNdWB_fixed_Asm_P8:
+  case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+  case ARM::VST2LNdWB_fixed_Asm_U8:
+    return ARM::VST2LNd8_UPD;
+  case ARM::VST2LNdWB_fixed_Asm_16:  case ARM::VST2LNdWB_fixed_Asm_P16:
+  case ARM::VST2LNdWB_fixed_Asm_I16: case ARM::VST2LNdWB_fixed_Asm_S16:
+  case ARM::VST2LNdWB_fixed_Asm_U16:
+    return ARM::VST2LNd16_UPD;
+  case ARM::VST2LNdWB_fixed_Asm_32:  case ARM::VST2LNdWB_fixed_Asm_F:
+  case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+  case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32:
+    return ARM::VST2LNd32_UPD;
+  case ARM::VST2LNdWB_register_Asm_8:  case ARM::VST2LNdWB_register_Asm_P8:
+  case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+  case ARM::VST2LNdWB_register_Asm_U8:
+    return ARM::VST2LNd8_UPD;
+  case ARM::VST2LNdWB_register_Asm_16:  case ARM::VST2LNdWB_register_Asm_P16:
+  case ARM::VST2LNdWB_register_Asm_I16: case ARM::VST2LNdWB_register_Asm_S16:
+  case ARM::VST2LNdWB_register_Asm_U16:
+    return ARM::VST2LNd16_UPD;
+  case ARM::VST2LNdWB_register_Asm_32:  case ARM::VST2LNdWB_register_Asm_F:
+  case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+  case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32:
+    return ARM::VST2LNd32_UPD;
+  case ARM::VST2LNdAsm_8:  case ARM::VST2LNdAsm_P8:
+  case ARM::VST2LNdAsm_I8: case ARM::VST2LNdAsm_S8:
+  case ARM::VST2LNdAsm_U8:
+    return ARM::VST2LNd8;
+  case ARM::VST2LNdAsm_16:  case ARM::VST2LNdAsm_P16:
+  case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+  case ARM::VST2LNdAsm_U16:
+    return ARM::VST2LNd16;
+  case ARM::VST2LNdAsm_32:  case ARM::VST2LNdAsm_F:
+  case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32:
+  case ARM::VST2LNdAsm_S32: case ARM::VST2LNdAsm_U32:
+    return ARM::VST2LNd32;
+  }
+}
+
+static unsigned getRealVLDLNOpcode(unsigned Opc) {
+  switch(Opc) {
+  default: assert(0 && "unexpected opcode!");
+  // VLD1LN
+  case ARM::VLD1LNdWB_fixed_Asm_8:  case ARM::VLD1LNdWB_fixed_Asm_P8:
+  case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+  case ARM::VLD1LNdWB_fixed_Asm_U8:
+    return ARM::VLD1LNd8_UPD;
+  case ARM::VLD1LNdWB_fixed_Asm_16:  case ARM::VLD1LNdWB_fixed_Asm_P16:
+  case ARM::VLD1LNdWB_fixed_Asm_I16: case ARM::VLD1LNdWB_fixed_Asm_S16:
+  case ARM::VLD1LNdWB_fixed_Asm_U16:
+    return ARM::VLD1LNd16_UPD;
+  case ARM::VLD1LNdWB_fixed_Asm_32:  case ARM::VLD1LNdWB_fixed_Asm_F:
+  case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+  case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32:
+    return ARM::VLD1LNd32_UPD;
+  case ARM::VLD1LNdWB_register_Asm_8:  case ARM::VLD1LNdWB_register_Asm_P8:
+  case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+  case ARM::VLD1LNdWB_register_Asm_U8:
+    return ARM::VLD1LNd8_UPD;
+  case ARM::VLD1LNdWB_register_Asm_16:  case ARM::VLD1LNdWB_register_Asm_P16:
+  case ARM::VLD1LNdWB_register_Asm_I16: case ARM::VLD1LNdWB_register_Asm_S16:
+  case ARM::VLD1LNdWB_register_Asm_U16:
+    return ARM::VLD1LNd16_UPD;
+  case ARM::VLD1LNdWB_register_Asm_32:  case ARM::VLD1LNdWB_register_Asm_F:
+  case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+  case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32:
+    return ARM::VLD1LNd32_UPD;
+  case ARM::VLD1LNdAsm_8:  case ARM::VLD1LNdAsm_P8:
+  case ARM::VLD1LNdAsm_I8: case ARM::VLD1LNdAsm_S8:
+  case ARM::VLD1LNdAsm_U8:
+    return ARM::VLD1LNd8;
+  case ARM::VLD1LNdAsm_16:  case ARM::VLD1LNdAsm_P16:
+  case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+  case ARM::VLD1LNdAsm_U16:
+    return ARM::VLD1LNd16;
+  case ARM::VLD1LNdAsm_32:  case ARM::VLD1LNdAsm_F:
+  case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32:
+  case ARM::VLD1LNdAsm_S32: case ARM::VLD1LNdAsm_U32:
+    return ARM::VLD1LNd32;
+
+  // VLD2LN
+  case ARM::VLD2LNdWB_fixed_Asm_8:  case ARM::VLD2LNdWB_fixed_Asm_P8:
+  case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+  case ARM::VLD2LNdWB_fixed_Asm_U8:
+    return ARM::VLD2LNd8_UPD;
+  case ARM::VLD2LNdWB_fixed_Asm_16:  case ARM::VLD2LNdWB_fixed_Asm_P16:
+  case ARM::VLD2LNdWB_fixed_Asm_I16: case ARM::VLD2LNdWB_fixed_Asm_S16:
+  case ARM::VLD2LNdWB_fixed_Asm_U16:
+    return ARM::VLD2LNd16_UPD;
+  case ARM::VLD2LNdWB_fixed_Asm_32:  case ARM::VLD2LNdWB_fixed_Asm_F:
+  case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+  case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32:
+    return ARM::VLD2LNd32_UPD;
+  case ARM::VLD2LNdWB_register_Asm_8:  case ARM::VLD2LNdWB_register_Asm_P8:
+  case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+  case ARM::VLD2LNdWB_register_Asm_U8:
+    return ARM::VLD2LNd8_UPD;
+  case ARM::VLD2LNdWB_register_Asm_16:  case ARM::VLD2LNdWB_register_Asm_P16:
+  case ARM::VLD2LNdWB_register_Asm_I16: case ARM::VLD2LNdWB_register_Asm_S16:
+  case ARM::VLD2LNdWB_register_Asm_U16:
+    return ARM::VLD2LNd16_UPD;
+  case ARM::VLD2LNdWB_register_Asm_32:  case ARM::VLD2LNdWB_register_Asm_F:
+  case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+  case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32:
+    return ARM::VLD2LNd32_UPD;
+  case ARM::VLD2LNdAsm_8:  case ARM::VLD2LNdAsm_P8:
+  case ARM::VLD2LNdAsm_I8: case ARM::VLD2LNdAsm_S8:
+  case ARM::VLD2LNdAsm_U8:
+    return ARM::VLD2LNd8;
+  case ARM::VLD2LNdAsm_16:  case ARM::VLD2LNdAsm_P16:
+  case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+  case ARM::VLD2LNdAsm_U16:
+    return ARM::VLD2LNd16;
+  case ARM::VLD2LNdAsm_32:  case ARM::VLD2LNdAsm_F:
+  case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32:
+  case ARM::VLD2LNdAsm_S32: case ARM::VLD2LNdAsm_U32:
+    return ARM::VLD2LNd32;
+  }
+}
+
 bool ARMAsmParser::
 processInstruction(MCInst &Inst,
                    const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   switch (Inst.getOpcode()) {
-  // Handle the MOV complex aliases.
+  // Handle NEON VST complex aliases.
+  case ARM::VST1LNdWB_register_Asm_8: case ARM::VST1LNdWB_register_Asm_P8:
+  case ARM::VST1LNdWB_register_Asm_I8: case ARM::VST1LNdWB_register_Asm_S8:
+  case ARM::VST1LNdWB_register_Asm_U8: case ARM::VST1LNdWB_register_Asm_16:
+  case ARM::VST1LNdWB_register_Asm_P16: case ARM::VST1LNdWB_register_Asm_I16:
+  case ARM::VST1LNdWB_register_Asm_S16: case ARM::VST1LNdWB_register_Asm_U16:
+  case ARM::VST1LNdWB_register_Asm_32: case ARM::VST1LNdWB_register_Asm_F:
+  case ARM::VST1LNdWB_register_Asm_F32: case ARM::VST1LNdWB_register_Asm_I32:
+  case ARM::VST1LNdWB_register_Asm_S32: case ARM::VST1LNdWB_register_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdWB_register_Asm_8: case ARM::VST2LNdWB_register_Asm_P8:
+  case ARM::VST2LNdWB_register_Asm_I8: case ARM::VST2LNdWB_register_Asm_S8:
+  case ARM::VST2LNdWB_register_Asm_U8: case ARM::VST2LNdWB_register_Asm_16:
+  case ARM::VST2LNdWB_register_Asm_P16: case ARM::VST2LNdWB_register_Asm_I16:
+  case ARM::VST2LNdWB_register_Asm_S16: case ARM::VST2LNdWB_register_Asm_U16:
+  case ARM::VST2LNdWB_register_Asm_32: case ARM::VST2LNdWB_register_Asm_F:
+  case ARM::VST2LNdWB_register_Asm_F32: case ARM::VST2LNdWB_register_Asm_I32:
+  case ARM::VST2LNdWB_register_Asm_S32: case ARM::VST2LNdWB_register_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::VST1LNdWB_fixed_Asm_8: case ARM::VST1LNdWB_fixed_Asm_P8:
+  case ARM::VST1LNdWB_fixed_Asm_I8: case ARM::VST1LNdWB_fixed_Asm_S8:
+  case ARM::VST1LNdWB_fixed_Asm_U8: case ARM::VST1LNdWB_fixed_Asm_16:
+  case ARM::VST1LNdWB_fixed_Asm_P16: case ARM::VST1LNdWB_fixed_Asm_I16:
+  case ARM::VST1LNdWB_fixed_Asm_S16: case ARM::VST1LNdWB_fixed_Asm_U16:
+  case ARM::VST1LNdWB_fixed_Asm_32: case ARM::VST1LNdWB_fixed_Asm_F:
+  case ARM::VST1LNdWB_fixed_Asm_F32: case ARM::VST1LNdWB_fixed_Asm_I32:
+  case ARM::VST1LNdWB_fixed_Asm_S32: case ARM::VST1LNdWB_fixed_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdWB_fixed_Asm_8: case ARM::VST2LNdWB_fixed_Asm_P8:
+  case ARM::VST2LNdWB_fixed_Asm_I8: case ARM::VST2LNdWB_fixed_Asm_S8:
+  case ARM::VST2LNdWB_fixed_Asm_U8: case ARM::VST2LNdWB_fixed_Asm_16:
+  case ARM::VST2LNdWB_fixed_Asm_P16: case ARM::VST2LNdWB_fixed_Asm_I16:
+  case ARM::VST2LNdWB_fixed_Asm_S16: case ARM::VST2LNdWB_fixed_Asm_U16:
+  case ARM::VST2LNdWB_fixed_Asm_32: case ARM::VST2LNdWB_fixed_Asm_F:
+  case ARM::VST2LNdWB_fixed_Asm_F32: case ARM::VST2LNdWB_fixed_Asm_I32:
+  case ARM::VST2LNdWB_fixed_Asm_S32: case ARM::VST2LNdWB_fixed_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::VST1LNdAsm_8: case ARM::VST1LNdAsm_P8: case ARM::VST1LNdAsm_I8:
+  case ARM::VST1LNdAsm_S8: case ARM::VST1LNdAsm_U8: case ARM::VST1LNdAsm_16:
+  case ARM::VST1LNdAsm_P16: case ARM::VST1LNdAsm_I16: case ARM::VST1LNdAsm_S16:
+  case ARM::VST1LNdAsm_U16: case ARM::VST1LNdAsm_32: case ARM::VST1LNdAsm_F:
+  case ARM::VST1LNdAsm_F32: case ARM::VST1LNdAsm_I32: case ARM::VST1LNdAsm_S32:
+  case ARM::VST1LNdAsm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdAsm_8: case ARM::VST2LNdAsm_P8: case ARM::VST2LNdAsm_I8:
+  case ARM::VST2LNdAsm_S8: case ARM::VST2LNdAsm_U8: case ARM::VST2LNdAsm_16:
+  case ARM::VST2LNdAsm_P16: case ARM::VST2LNdAsm_I16: case ARM::VST2LNdAsm_S16:
+  case ARM::VST2LNdAsm_U16: case ARM::VST2LNdAsm_32: case ARM::VST2LNdAsm_F:
+  case ARM::VST2LNdAsm_F32: case ARM::VST2LNdAsm_I32: case ARM::VST2LNdAsm_S32:
+  case ARM::VST2LNdAsm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVSTLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+  // Handle NEON VLD complex aliases.
+  case ARM::VLD1LNdWB_register_Asm_8: case ARM::VLD1LNdWB_register_Asm_P8:
+  case ARM::VLD1LNdWB_register_Asm_I8: case ARM::VLD1LNdWB_register_Asm_S8:
+  case ARM::VLD1LNdWB_register_Asm_U8: case ARM::VLD1LNdWB_register_Asm_16:
+  case ARM::VLD1LNdWB_register_Asm_P16: case ARM::VLD1LNdWB_register_Asm_I16:
+  case ARM::VLD1LNdWB_register_Asm_S16: case ARM::VLD1LNdWB_register_Asm_U16:
+  case ARM::VLD1LNdWB_register_Asm_32: case ARM::VLD1LNdWB_register_Asm_F:
+  case ARM::VLD1LNdWB_register_Asm_F32: case ARM::VLD1LNdWB_register_Asm_I32:
+  case ARM::VLD1LNdWB_register_Asm_S32: case ARM::VLD1LNdWB_register_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdWB_register_Asm_8: case ARM::VLD2LNdWB_register_Asm_P8:
+  case ARM::VLD2LNdWB_register_Asm_I8: case ARM::VLD2LNdWB_register_Asm_S8:
+  case ARM::VLD2LNdWB_register_Asm_U8: case ARM::VLD2LNdWB_register_Asm_16:
+  case ARM::VLD2LNdWB_register_Asm_P16: case ARM::VLD2LNdWB_register_Asm_I16:
+  case ARM::VLD2LNdWB_register_Asm_S16: case ARM::VLD2LNdWB_register_Asm_U16:
+  case ARM::VLD2LNdWB_register_Asm_32: case ARM::VLD2LNdWB_register_Asm_F:
+  case ARM::VLD2LNdWB_register_Asm_F32: case ARM::VLD2LNdWB_register_Asm_I32:
+  case ARM::VLD2LNdWB_register_Asm_S32: case ARM::VLD2LNdWB_register_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD1LNdWB_fixed_Asm_8: case ARM::VLD1LNdWB_fixed_Asm_P8:
+  case ARM::VLD1LNdWB_fixed_Asm_I8: case ARM::VLD1LNdWB_fixed_Asm_S8:
+  case ARM::VLD1LNdWB_fixed_Asm_U8: case ARM::VLD1LNdWB_fixed_Asm_16:
+  case ARM::VLD1LNdWB_fixed_Asm_P16: case ARM::VLD1LNdWB_fixed_Asm_I16:
+  case ARM::VLD1LNdWB_fixed_Asm_S16: case ARM::VLD1LNdWB_fixed_Asm_U16:
+  case ARM::VLD1LNdWB_fixed_Asm_32: case ARM::VLD1LNdWB_fixed_Asm_F:
+  case ARM::VLD1LNdWB_fixed_Asm_F32: case ARM::VLD1LNdWB_fixed_Asm_I32:
+  case ARM::VLD1LNdWB_fixed_Asm_S32: case ARM::VLD1LNdWB_fixed_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdWB_fixed_Asm_8: case ARM::VLD2LNdWB_fixed_Asm_P8:
+  case ARM::VLD2LNdWB_fixed_Asm_I8: case ARM::VLD2LNdWB_fixed_Asm_S8:
+  case ARM::VLD2LNdWB_fixed_Asm_U8: case ARM::VLD2LNdWB_fixed_Asm_16:
+  case ARM::VLD2LNdWB_fixed_Asm_P16: case ARM::VLD2LNdWB_fixed_Asm_I16:
+  case ARM::VLD2LNdWB_fixed_Asm_S16: case ARM::VLD2LNdWB_fixed_Asm_U16:
+  case ARM::VLD2LNdWB_fixed_Asm_32: case ARM::VLD2LNdWB_fixed_Asm_F:
+  case ARM::VLD2LNdWB_fixed_Asm_F32: case ARM::VLD2LNdWB_fixed_Asm_I32:
+  case ARM::VLD2LNdWB_fixed_Asm_S32: case ARM::VLD2LNdWB_fixed_Asm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD1LNdAsm_8: case ARM::VLD1LNdAsm_P8: case ARM::VLD1LNdAsm_I8:
+  case ARM::VLD1LNdAsm_S8: case ARM::VLD1LNdAsm_U8: case ARM::VLD1LNdAsm_16:
+  case ARM::VLD1LNdAsm_P16: case ARM::VLD1LNdAsm_I16: case ARM::VLD1LNdAsm_S16:
+  case ARM::VLD1LNdAsm_U16: case ARM::VLD1LNdAsm_32: case ARM::VLD1LNdAsm_F:
+  case ARM::VLD1LNdAsm_F32: case ARM::VLD1LNdAsm_I32: case ARM::VLD1LNdAsm_S32:
+  case ARM::VLD1LNdAsm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdAsm_8: case ARM::VLD2LNdAsm_P8: case ARM::VLD2LNdAsm_I8:
+  case ARM::VLD2LNdAsm_S8: case ARM::VLD2LNdAsm_U8: case ARM::VLD2LNdAsm_16:
+  case ARM::VLD2LNdAsm_P16: case ARM::VLD2LNdAsm_I16: case ARM::VLD2LNdAsm_S16:
+  case ARM::VLD2LNdAsm_U16: case ARM::VLD2LNdAsm_32: case ARM::VLD2LNdAsm_F:
+  case ARM::VLD2LNdAsm_F32: case ARM::VLD2LNdAsm_I32: case ARM::VLD2LNdAsm_S32:
+  case ARM::VLD2LNdAsm_U32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    TmpInst.setOpcode(getRealVLDLNOpcode(Inst.getOpcode()));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg()+1));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+  // Handle the Thumb2 mode MOV complex aliases.
+  case ARM::t2MOVsi:
+  case ARM::t2MOVSsi: {
+    // Which instruction to expand to depends on the CCOut operand and
+    // whether we're in an IT block if the register operands are low
+    // registers.
+    bool isNarrow = false;
+    if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+        isARMLowRegister(Inst.getOperand(1).getReg()) &&
+        inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+      isNarrow = true;
+    MCInst TmpInst;
+    unsigned newOpc;
+    switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+    case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+    case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+    case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+    }
+    unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+    if (Ammount == 32) Ammount = 0;
+    TmpInst.setOpcode(newOpc);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    if (isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    if (!isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+    Inst = TmpInst;
+    return true;
+  }
+  // Handle the ARM mode MOV complex aliases.
   case ARM::ASRr:
   case ARM::LSRr:
   case ARM::LSLr:
@@ -4743,6 +5656,24 @@ processInstruction(MCInst &Inst,
       Inst = TmpInst;
     }
     break;
+  case ARM::t2ADDri12:
+    // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+    // mnemonic was used (not "addw"), encoding T3 is preferred.
+    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+        ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+      break;
+    Inst.setOpcode(ARM::t2ADDri);
+    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    break;
+  case ARM::t2SUBri12:
+    // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+    // mnemonic was used (not "subw"), encoding T3 is preferred.
+    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+        ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+      break;
+    Inst.setOpcode(ARM::t2SUBri);
+    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    break;
   case ARM::tADDi8:
     // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
     // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
@@ -4763,6 +5694,26 @@ processInstruction(MCInst &Inst,
       return true;
     }
     break;
+  case ARM::t2ADDrr: {
+    // If the destination and first source operand are the same, and
+    // there's no setting of the flags, use encoding T2 instead of T3.
+    // Note that this is only for ADD, not SUB. This mirrors the system
+    // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+        Inst.getOperand(5).getReg() != 0 ||
+        (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+      break;
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tADDhirr);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(3));
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
   case ARM::tB:
     // A Thumb conditional branch outside of an IT block is a tBcc.
     if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
@@ -5079,12 +6030,16 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
     return parseDirectiveWord(4, DirectiveID.getLoc());
   else if (IDVal == ".thumb")
     return parseDirectiveThumb(DirectiveID.getLoc());
+  else if (IDVal == ".arm")
+    return parseDirectiveARM(DirectiveID.getLoc());
   else if (IDVal == ".thumb_func")
     return parseDirectiveThumbFunc(DirectiveID.getLoc());
   else if (IDVal == ".code")
     return parseDirectiveCode(DirectiveID.getLoc());
   else if (IDVal == ".syntax")
     return parseDirectiveSyntax(DirectiveID.getLoc());
+  else if (IDVal == ".unreq")
+    return parseDirectiveUnreq(DirectiveID.getLoc());
   return true;
 }
 
@@ -5120,9 +6075,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
     return Error(L, "unexpected token in directive");
   Parser.Lex();
 
-  // TODO: set thumb mode
-  // TODO: tell the MC streamer the mode
-  // getParser().getStreamer().Emit???();
+  if (!isThumb())
+    SwitchMode();
+  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+  return false;
+}
+
+/// parseDirectiveARM
+///  ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  if (isThumb())
+    SwitchMode();
+  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
   return false;
 }
 
@@ -5212,6 +6180,45 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
   return false;
 }
 
+/// parseDirectiveReq
+///  ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+  Parser.Lex(); // Eat the '.req' token.
+  unsigned Reg;
+  SMLoc SRegLoc, ERegLoc;
+  if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+    Parser.EatToEndOfStatement();
+    return Error(SRegLoc, "register name expected");
+  }
+
+  // Shouldn't be anything else.
+  if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return Error(Parser.getTok().getLoc(),
+                 "unexpected input in .req directive.");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+
+  if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+    return Error(SRegLoc, "redefinition of '" + Name +
+                          "' does not match original.");
+
+  return false;
+}
+
+/// parseDirectiveUneq
+///  ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+  if (Parser.getTok().isNot(AsmToken::Identifier)) {
+    Parser.EatToEndOfStatement();
+    return Error(L, "unexpected input in .unreq directive.");
+  }
+  RegisterReqs.erase(Parser.getTok().getIdentifier());
+  Parser.Lex(); // Eat the identifier.
+  return false;
+}
+
 extern "C" void LLVMInitializeARMAsmLexer();
 
 /// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39..e24a1b1 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
   )
 
 add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
index cbf9b4b..f0184b6 100644
--- a/lib/Target/ARM/AsmParser/LLVMBuild.txt
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmParser
 parent = ARM
 required_libraries = ARMDesc ARMInfo MC MCParser Support
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 511932e..04cdf55 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -48,20 +48,6 @@ add_llvm_target(ARMCodeGen
   Thumb2SizeReduction.cpp
   )
 
-add_llvm_library_dependencies(LLVMARMCodeGen
-  LLVMARMAsmPrinter
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 # workaround for hanging compilation on MSVC9, 10
 if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
 set_property(
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ad250ab..49c64fd 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -2085,15 +2085,24 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
     case ARM::VLD1d32Qwb_register:
     case ARM::VLD1d64Qwb_fixed:
     case ARM::VLD1d64Qwb_register:
-    case ARM::VLD2d8_UPD:
-    case ARM::VLD2d16_UPD:
-    case ARM::VLD2d32_UPD:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
-    case ARM::VLD2b8_UPD:
-    case ARM::VLD2b16_UPD:
-    case ARM::VLD2b32_UPD:
+    case ARM::VLD2d8wb_fixed:
+    case ARM::VLD2d16wb_fixed:
+    case ARM::VLD2d32wb_fixed:
+    case ARM::VLD2q8wb_fixed:
+    case ARM::VLD2q16wb_fixed:
+    case ARM::VLD2q32wb_fixed:
+    case ARM::VLD2d8wb_register:
+    case ARM::VLD2d16wb_register:
+    case ARM::VLD2d32wb_register:
+    case ARM::VLD2q8wb_register:
+    case ARM::VLD2q16wb_register:
+    case ARM::VLD2q32wb_register:
+    case ARM::VLD2b8wb_fixed:
+    case ARM::VLD2b16wb_fixed:
+    case ARM::VLD2b32wb_fixed:
+    case ARM::VLD2b8wb_register:
+    case ARM::VLD2b16wb_register:
+    case ARM::VLD2b32wb_register:
     case ARM::VLD3d8_UPD:
     case ARM::VLD3d16_UPD:
     case ARM::VLD3d32_UPD:
@@ -2196,23 +2205,40 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
     case ARM::VST1q16wb_register:
     case ARM::VST1q32wb_register:
     case ARM::VST1q64wb_register:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2d8_UPD:
-    case ARM::VST2d16_UPD:
-    case ARM::VST2d32_UPD:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
-    case ARM::VST2b8_UPD:
-    case ARM::VST2b16_UPD:
-    case ARM::VST2b32_UPD:
+    case ARM::VST1d8Twb_fixed:
+    case ARM::VST1d16Twb_fixed:
+    case ARM::VST1d32Twb_fixed:
+    case ARM::VST1d64Twb_fixed:
+    case ARM::VST1d8Twb_register:
+    case ARM::VST1d16Twb_register:
+    case ARM::VST1d32Twb_register:
+    case ARM::VST1d64Twb_register:
+    case ARM::VST1d8Qwb_fixed:
+    case ARM::VST1d16Qwb_fixed:
+    case ARM::VST1d32Qwb_fixed:
+    case ARM::VST1d64Qwb_fixed:
+    case ARM::VST1d8Qwb_register:
+    case ARM::VST1d16Qwb_register:
+    case ARM::VST1d32Qwb_register:
+    case ARM::VST1d64Qwb_register:
+    case ARM::VST2d8wb_fixed:
+    case ARM::VST2d16wb_fixed:
+    case ARM::VST2d32wb_fixed:
+    case ARM::VST2d8wb_register:
+    case ARM::VST2d16wb_register:
+    case ARM::VST2d32wb_register:
+    case ARM::VST2q8wb_fixed:
+    case ARM::VST2q16wb_fixed:
+    case ARM::VST2q32wb_fixed:
+    case ARM::VST2q8wb_register:
+    case ARM::VST2q16wb_register:
+    case ARM::VST2q32wb_register:
+    case ARM::VST2b8wb_fixed:
+    case ARM::VST2b16wb_fixed:
+    case ARM::VST2b32wb_fixed:
+    case ARM::VST2b8wb_register:
+    case ARM::VST2b16wb_register:
+    case ARM::VST2b32wb_register:
     case ARM::VST3d8_UPD:
     case ARM::VST3d16_UPD:
     case ARM::VST3d32_UPD:
@@ -2264,34 +2290,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Second input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8T:
-    case ARM::VST1d16T:
-    case ARM::VST1d32T:
-    case ARM::VST1d64T:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2d8:
-    case ARM::VST2d16:
-    case ARM::VST2d32:
-    case ARM::VST2d8_UPD:
-    case ARM::VST2d16_UPD:
-    case ARM::VST2d32_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST3d8:
     case ARM::VST3d16:
     case ARM::VST3d32:
@@ -2307,12 +2305,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
       if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
         return MCDisassembler::Fail;
       break;
-    case ARM::VST2b8:
-    case ARM::VST2b16:
-    case ARM::VST2b32:
-    case ARM::VST2b8_UPD:
-    case ARM::VST2b16_UPD:
-    case ARM::VST2b32_UPD:
     case ARM::VST3q8:
     case ARM::VST3q16:
     case ARM::VST3q32:
@@ -2334,28 +2326,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Third input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8T:
-    case ARM::VST1d16T:
-    case ARM::VST1d32T:
-    case ARM::VST1d64T:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST3d8:
     case ARM::VST3d16:
     case ARM::VST3d32:
@@ -2392,20 +2362,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Fourth input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST4d8:
     case ARM::VST4d16:
     case ARM::VST4d32:
@@ -2441,16 +2397,11 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   unsigned align = fieldFromInstruction32(Insn, 4, 1);
   unsigned size = fieldFromInstruction32(Insn, 6, 2);
-  unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
 
   align *= (1 << size);
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
-  if (regs == 2) {
-    if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
-      return MCDisassembler::Fail;
-  }
   if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
       return MCDisassembler::Fail;
@@ -2460,12 +2411,12 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(align));
 
-  if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
-  else if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
-      return MCDisassembler::Fail;
-  }
+  // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+  // variant encodes Rm == 0xf. Anything else is a register offset post-
+  // increment and we need to add the register operand to the instruction.
+  if (Rm != 0xD && Rm != 0xF &&
+      !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
 
   return S;
 }
@@ -2693,7 +2644,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
   unsigned op = fieldFromInstruction32(Insn, 6, 1);
-  unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2702,10 +2652,8 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail; // Writeback
   }
 
-  for (unsigned i = 0; i < length; ++i) {
-    if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
+  if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
-  }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -4138,4 +4086,3 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn,
 
   return S;
 }
-
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751..9de6e5c 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
   )
 endif()
 add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
-  LLVMARMCodeGen
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
index baa9bc3..94075a9 100644
--- a/lib/Target/ARM/Disassembler/LLVMBuild.txt
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDisassembler
 parent = ARM
 required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 6c6c021..662097a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1029,3 +1029,29 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
     << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
     << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
 }
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3f38f1a..05db2d2 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -133,6 +133,12 @@ public:
   void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b495..e2d4819 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
   )
 
 add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
index b34aab4..6f4fa36 100644
--- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMAsmPrinter
 parent = ARM
 required_libraries = MC Support
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
index 9082539..fd4b3a3 100644
--- a/lib/Target/ARM/LLVMBuild.txt
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = ARM
@@ -30,4 +33,3 @@ name = ARMCodeGen
 parent = ARM
 required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 62d04c4..bf1f0e8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -102,6 +102,11 @@ public:
 
   bool MayNeedRelaxation(const MCInst &Inst) const;
 
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
   void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -124,14 +129,49 @@ public:
 };
 } // end anonymous namespace
 
+static unsigned getRelaxedOpcode(unsigned Op) {
+  switch (Op) {
+  default: return Op;
+  case ARM::tBcc: return ARM::t2Bcc;
+  }
+}
+
 bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
-  // FIXME: Thumb targets, different move constant targets..
+  if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
   return false;
 }
 
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                         uint64_t Value,
+                                         const MCInstFragment *DF,
+                                         const MCAsmLayout &Layout) const {
+  // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+  // low bit being an implied zero. There's an implied +4 offset for the
+  // branch, so we adjust the other way here to determine what's
+  // encodable.
+  //
+  // Relax if the value is too big for a (signed) i8.
+  int64_t Offset = int64_t(Value) - 4;
+  return Offset > 254 || Offset < -256;
+}
+
 void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
-  assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
-  return;
+  unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+  // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+  if (RelaxedOp == Inst.getOpcode()) {
+    SmallString<256> Tmp;
+    raw_svector_ostream OS(Tmp);
+    Inst.dump_pretty(OS);
+    OS << "\n";
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
+  }
+
+  // The instructions we're relaxing have (so far) the same operands.
+  // We just need to update to the proper opcode.
+  Res = Inst;
+  Res.setOpcode(RelaxedOp);
 }
 
 bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e2..c38a882 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1412,7 +1412,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
                           SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand &MO = MI.getOperand(Op);
   if (MO.getReg() == 0) return 0x0D;
-  return MO.getReg();
+  return getARMRegisterNumbering(MO.getReg());
 }
 
 unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e..f394b4f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -178,9 +179,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
   case ARM::fixup_arm_movt_hi16:
   case ARM::fixup_arm_movt_hi16_pcrel:
     MovtBit = 1;
+    // The thumb bit shouldn't be set in the 'other-half' bit of the
+    // relocation, but it will be set in FixedValue if the base symbol
+    // is a thumb function. Clear it out here.
+    if (A_SD->getFlags() & SF_ThumbFunc)
+      FixedValue &= 0xfffffffe;
     break;
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movt_hi16_pcrel:
+    if (A_SD->getFlags() & SF_ThumbFunc)
+      FixedValue &= 0xfffffffe;
     MovtBit = 1;
     // Fallthrough
   case ARM::fixup_t2_movw_lo16:
@@ -189,7 +197,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
     break;
   }
 
-
   if (Type == macho::RIT_ARM_HalfDifference) {
     uint32_t OtherHalf = MovtBit
       ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index f529314..f2cf78a 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -10,10 +10,3 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen)
 
 # Hack: we need to include 'main' target directory to grab private headers
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
-  LLVMARMAsmPrinter
-  LLVMARMInfo
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
index 46b11c7..2a7fe61 100644
--- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMDesc
 parent = ARM
 required_libraries = ARMAsmPrinter ARMInfo MC Support
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df0053..000a37f 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
   // FIXME: Detect integer instructions properly.
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
-  if (MCID.mayStore())
+  if (MI->mayStore())
     return false;
   unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
   const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
   unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
 
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
     .addReg(Src1Reg, getKillRegState(Src1Kill))
     .addReg(Src2Reg, getKillRegState(Src2Kill));
   if (HasLane)
     MIB.addImm(LaneImm);
   MIB.addImm(Pred).addReg(PredReg);
 
-  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
     .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
 
   if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
     }
 
     const MCInstrDesc &MCID = MI->getDesc();
-    if (MCID.isBarrier()) {
+    if (MI->isBarrier()) {
       clearStack();
       Skip = 0;
       ++MII;
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b13..533e747 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
   )
 
 add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
index 046c1fc..a07a940 100644
--- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = ARMInfo
 parent = ARM
 required_libraries = MC Support Target
 add_to_library_groups = ARM
-
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482..e61c0a7 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -643,14 +643,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   assert(Offset && "This code isn't needed if offset already handled!");
 
   unsigned Opcode = MI.getOpcode();
-  const MCInstrDesc &Desc = MI.getDesc();
 
   // Remove predicate first.
   int PIdx = MI.findFirstPredOperandIdx();
   if (PIdx != -1)
     removeOperands(MI, PIdx);
 
-  if (Desc.mayLoad()) {
+  if (MI.mayLoad()) {
     // Use the destination register to materialize sp + offset.
     unsigned TmpReg = MI.getOperand(0).getReg();
     bool UseRR = false;
@@ -673,7 +672,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
       // register. The offset is already handled in the vreg value.
       MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
-  } else if (Desc.mayStore()) {
+  } else if (MI.mayStore()) {
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
       bool UseRR = false;
 
@@ -699,7 +698,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 
   // Add predicate back if it's needed.
-  if (MI.getDesc().isPredicable()) {
+  if (MI.isPredicable()) {
     MachineInstrBuilder MIB(&MI);
     AddDefaultPred(MIB);
   }
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b627400..55b4d30 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -13,6 +13,7 @@
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
   //   rsb   r2, 0
   //
   const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.hasOptionalDef() &&
+  if (MI->hasOptionalDef() &&
       MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
     return false;
 
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     // Branches, including tricky ones like LDM_RET, need to end an IT
     // block so check the instruction we just put in the block.
     for (; MBBI != E && Pos &&
-           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+           (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
       if (MBBI->isDebugValue())
         continue;
 
@@ -237,6 +238,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     // Last instruction in IT block kills ITSTATE.
     LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
 
+    // Finalize the bundle.
+    FinalizeBundle(MBB, InsertPos.getInstrIterator(), LastITMI);
+
     Modified = true;
     ++NumITs;
   }
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index e5fc8b4..e206288 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -452,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Add the 16-bit load / store instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
   if (!isLdStMul) {
     MIB.addOperand(MI->getOperand(0));
     MIB.addOperand(MI->getOperand(1));
@@ -478,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++NumLdSts;
   return true;
 }
@@ -513,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
         MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
       return false;
 
-    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+    MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
                                       TII->get(ARM::tADDrSPi))
       .addOperand(MI->getOperand(0))
       .addOperand(MI->getOperand(1))
@@ -525,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 
     DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
 
-    MBB.erase(MI);
+    MBB.erase_instr(MI);
     ++NumNarrows;
     return true;
   }
@@ -533,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.mayLoad() || MCID.mayStore())
+  if (MI->mayLoad() || MI->mayStore())
     return ReduceLoadStore(MBB, MI, Entry);
 
   switch (Opc) {
@@ -654,7 +653,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
   if (NewMCID.hasOptionalDef()) {
     if (HasCC)
@@ -678,7 +677,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++Num2Addrs;
   return true;
 }
@@ -745,7 +744,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
   if (NewMCID.hasOptionalDef()) {
     if (HasCC)
@@ -785,7 +784,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++NumNarrows;
   return true;
 }
@@ -830,16 +829,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   // Yes, CPSR could be livein.
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
   MachineInstr *CPSRDef = 0;
+  MachineInstr *BundleMI = 0;
 
   // If this BB loops back to itself, conservatively avoid narrowing the
   // first instruction that does partial flag update.
   bool IsSelfLoop = MBB.isSuccessor(&MBB);
-  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-  MachineBasicBlock::iterator NextMII;
+  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator NextMII;
   for (; MII != E; MII = NextMII) {
     NextMII = llvm::next(MII);
 
     MachineInstr *MI = &*MII;
+    if (MI->isBundle()) {
+      BundleMI = MI;
+      continue;
+    }
+
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
 
     unsigned Opcode = MI->getOpcode();
@@ -850,7 +855,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       if (Entry.Special) {
         if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
           Modified = true;
-          MachineBasicBlock::iterator I = prior(NextMII);
+          MachineBasicBlock::instr_iterator I = prior(NextMII);
           MI = &*I;
         }
         goto ProcessNext;
@@ -860,7 +865,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       if (Entry.NarrowOpc2 &&
           ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
-        MachineBasicBlock::iterator I = prior(NextMII);
+        MachineBasicBlock::instr_iterator I = prior(NextMII);
         MI = &*I;
         goto ProcessNext;
       }
@@ -869,15 +874,24 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       if (Entry.NarrowOpc1 &&
           ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
-        MachineBasicBlock::iterator I = prior(NextMII);
+        MachineBasicBlock::instr_iterator I = prior(NextMII);
         MI = &*I;
       }
     }
 
   ProcessNext:
+    if (LiveCPSR &&
+        NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle() &&
+        BundleMI->killsRegister(ARM::CPSR))
+      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+      // marker is only on the BUNDLE instruction. Process the BUNDLE
+      // instruction as we finish with the bundled instruction to work around
+      // the inconsistency.
+      LiveCPSR = false;
+
     bool DefCPSR = false;
     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
-    if (MI->getDesc().isCall()) {
+    if (MI->isCall()) {
       // Calls don't really set CPSR.
       CPSRDef = 0;
       IsSelfLoop = false;
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
index edf8ee7..fa819a4 100644
--- a/lib/Target/CBackend/CMakeLists.txt
+++ b/lib/Target/CBackend/CMakeLists.txt
@@ -2,16 +2,4 @@ add_llvm_target(CBackendCodeGen
   CBackend.cpp
   )
 
-add_llvm_library_dependencies(LLVMCBackendCodeGen
-  LLVMAnalysis
-  LLVMCBackendInfo
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
-
 add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index ca346af..8b2286e 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -21,10 +21,10 @@ namespace llvm {
 
 struct CTargetMachine : public TargetMachine {
   CTargetMachine(const Target &T, StringRef TT,
-                 StringRef CPU, StringRef FS,
+                 StringRef CPU, StringRef FS, const TargetOptions &Options,
                  Reloc::Model RM, CodeModel::Model CM,
                  CodeGenOpt::Level OL)
-    : TargetMachine(T, TT, CPU, FS) {}
+    : TargetMachine(T, TT, CPU, FS, Options) { }
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt
index 851ded9..e64feb0 100644
--- a/lib/Target/CBackend/LLVMBuild.txt
+++ b/lib/Target/CBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = TargetInfo
+
 [component_0]
 type = TargetGroup
 name = CBackend
@@ -26,4 +29,3 @@ name = CBackendCodeGen
 parent = CBackend
 required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils
 add_to_library_groups = CBackend
-
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
index 8e616be..6203616 100644
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMCBackendInfo
   CBackendTargetInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMCBackendInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
index 35752b7..1b47d8e 100644
--- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CBackendInfo
 parent = CBackend
 required_libraries = MC Support Target
 add_to_library_groups = CBackend
-
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 60e2189..22d8c76 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMTarget
   Target.cpp
   TargetData.cpp
   TargetELFWriterInfo.cpp
-  TargetFrameLowering.cpp
   TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
   TargetLibraryInfo.cpp
@@ -13,12 +12,6 @@ add_llvm_library(LLVMTarget
   TargetSubtargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMTarget
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  )
-
 foreach(t ${LLVM_TARGETS_TO_BUILD})
   message(STATUS "Targeting ${t}")
   add_subdirectory(${t})
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index b442a5c..6c67c2d 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -23,17 +23,5 @@ add_llvm_target(CellSPUCodeGen
   SPUNopFiller.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUCodeGen
-  LLVMAsmPrinter
-  LLVMCellSPUDesc
-  LLVMCellSPUInfo
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
index 4ae26b2..277620b 100644
--- a/lib/Target/CellSPU/LLVMBuild.txt
+++ b/lib/Target/CellSPU/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = CellSPU
@@ -27,4 +30,3 @@ name = CellSPUCodeGen
 parent = CellSPU
 required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
 add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index d41fe93..0027bdb 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc
   SPUMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUDesc
-  LLVMCellSPUInfo
-  LLVMMC
-  )
-
 add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
index abc44a2..71e5bbc 100644
--- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUDesc
 parent = CellSPU
 required_libraries = CellSPUInfo MC
 add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 093f99f..916f9ba 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
   return MFI->getStackSize() &&
-    (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+    (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+     MFI->hasVarSizedObjects());
 }
 
 
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index d58e49b..dc0d5a6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -296,12 +296,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128,  Expand);
 
   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128,  Expand);
 
   // SPU has a version of select that implements (a&~c)|(b&c), just like
   // select ought to work:
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6940316..1e922a4 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -34,9 +34,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
 
 SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 909f12e..0841fee 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
   InstrItineraryData  InstrItins;
 public:
   SPUTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
                    CodeGenOpt::Level OL);
 
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 3f2d6b09..6a98f95 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo
   CellSPUTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
index 0710cc3..6937e70 100644
--- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CellSPUInfo
 parent = CellSPU
 required_libraries = MC Support Target
 add_to_library_groups = CellSPU
-
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 53f6868..515e1dd 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -2,11 +2,4 @@ add_llvm_target(CppBackendCodeGen
   CPPBackend.cpp
   )
 
-add_llvm_library_dependencies(LLVMCppBackendCodeGen
-  LLVMCore
-  LLVMCppBackendInfo
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index a3613b4..92bca6c 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,10 +23,10 @@ class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
   CPPTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
                    CodeGenOpt::Level OL)
-    : TargetMachine(T, TT, CPU, FS) {}
+    : TargetMachine(T, TT, CPU, FS, Options) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt
index 77e31c7..122b5e7 100644
--- a/lib/Target/CppBackend/LLVMBuild.txt
+++ b/lib/Target/CppBackend/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = TargetInfo
+
 [component_0]
 type = TargetGroup
 name = CppBackend
@@ -26,4 +29,3 @@ name = CppBackendCodeGen
 parent = CppBackend
 required_libraries = Core CppBackendInfo Support Target
 add_to_library_groups = CppBackend
-
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index 738b215..f82d72e 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -3,9 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMCppBackendInfo
   CppBackendTargetInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMCppBackendInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
index 67a23ba..d4dfc3e 100644
--- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = CppBackendInfo
 parent = CppBackend
 required_libraries = MC Support Target
 add_to_library_groups = CppBackend
-
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
new file mode 100644
index 0000000..f8705ee
--- /dev/null
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -0,0 +1,35 @@
+set(LLVM_TARGET_DEFINITIONS Hexagon.td)
+
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenIntrinsics.inc -gen-tgt-intrinsic)
+add_public_tablegen_target(HexagonCommonTableGen)
+
+add_llvm_target(HexagonCodeGen
+  HexagonAsmPrinter.cpp
+  HexagonCallingConvLower.cpp
+  HexagonCFGOptimizer.cpp
+  HexagonExpandPredSpillCode.cpp
+  HexagonFrameLowering.cpp
+  HexagonHardwareLoops.cpp
+  HexagonInstrInfo.cpp
+  HexagonISelDAGToDAG.cpp
+  HexagonISelLowering.cpp
+  HexagonMCAsmInfo.cpp
+  HexagonOptimizeSZExtends.cpp
+  HexagonRegisterInfo.cpp
+  HexagonRemoveSZExtArgs.cpp
+  HexagonSelectionDAGInfo.cpp
+  HexagonSplitTFRCondSets.cpp
+  HexagonSubtarget.cpp
+  HexagonTargetMachine.cpp
+  HexagonTargetObjectFile.cpp
+  )
+
+add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 0000000..a5f2279
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,54 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include <cassert>
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  class FunctionPass;
+  class TargetMachine;
+  class HexagonTargetMachine;
+  class raw_ostream;
+
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+  FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+  FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+  FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+  FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+  FunctionPass *createHexagonHardwareLoops();
+  FunctionPass *createHexagonOptimizeSZExtends();
+  FunctionPass *createHexagonFixupHwLoops();
+
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 0000000..72939e6
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,66 @@
+//===- Hexagon.td - Describe the Hexagon Target Machine ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//
+
+
+// Hexagon Archtectures
+def ArchV2       : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+                                    "Hexagon v2">;
+def ArchV3       : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+                                    "Hexagon v3">;
+def ArchV4       : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+                                    "Hexagon v4">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+
+def HexagonInstrInfo : InstrInfo {
+  // Define how we want to layout our target-specific information field.
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries Itin,
+           list<SubtargetFeature> Features>
+ : Processor<Name, Itin, Features>;
+
+def : Proc<"hexagonv2", HexagonItineraries,   [ArchV2]>;
+def : Proc<"hexagonv3", HexagonItineraries,   [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = HexagonInstrInfo;
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 0000000..8f8e804
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,555 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly ----=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+         "hexagon-align-calls", cl::Hidden, cl::init(true),
+          cl::desc("Insert falign after call instruction for Hexagon target"));
+
+
+namespace {
+  class HexagonAsmPrinter : public AsmPrinter {
+    const HexagonSubtarget *Subtarget;
+
+  public:
+    explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Assembly Printer";
+    }
+
+    /// printInstruction - This method is automatically generated by tablegen
+    /// from the instruction set description.  This method returns true if the
+    /// machine instruction was sufficiently described to print it, otherwise it
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    virtual void EmitInstruction(const MachineInstr *MI);
+
+    void printOp(const MachineOperand &MO, raw_ostream &O);
+
+    /// printRegister - Print register according to target requirements.
+    ///
+    void printRegister(const MachineOperand &MO, bool R0AsZero,
+                       raw_ostream &O) {
+      unsigned RegNo = MO.getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+      O << getRegisterName(RegNo);
+    }
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) {
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isReg()) {
+        printRegister(MO, false, OS);
+      } else if (MO.isImm()) {
+        OS << MO.getImm();
+      } else {
+        printOp(MO, OS);
+      }
+    }
+
+
+    bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &OS);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &OS);
+
+
+    void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo,
+                                raw_ostream &O) {
+      int value = MI->getOperand(OpNo).getImm();
+      O << value;
+    }
+
+
+    void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+      int value = MI->getOperand(OpNo).getImm();
+      O << -value;
+    }
+
+    void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+      const MachineOperand &MO1 = MI->getOperand(OpNo);
+      const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+      O << getRegisterName(MO1.getReg())
+        << " + #"
+        << (int) MO2.getImm();
+    }
+
+
+    void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+      const MachineOperand &MO1 = MI->getOperand(OpNo);
+      const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+      O << getRegisterName(MO1.getReg())
+        << ", #"
+        << MO2.getImm();
+    }
+
+    void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+                            raw_ostream &O) {
+      // Branches can take an immediate operand.  This is used by the branch
+      // selection pass to print $+8, an eight byte displacement from the PC.
+      if (MI->getOperand(OpNo).isImm()) {
+        O << "$+" << MI->getOperand(OpNo).getImm()*4;
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+    }
+
+    void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+                          raw_ostream &O) {
+    }
+
+    void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+                            raw_ostream &O) {
+    }
+
+
+    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      O << "#HI(";
+      if (MI->getOperand(OpNo).isImm()) {
+        printHexagonImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+      O << ")";
+    }
+
+    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      O << "#HI(";
+      if (MI->getOperand(OpNo).isImm()) {
+        printHexagonImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+      O << ")";
+    }
+
+    void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+                               raw_ostream &O);
+
+    void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+                                     raw_ostream &O);
+
+    void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+    void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+    void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const;
+
+    static const char *getRegisterName(unsigned RegNo);
+  };
+
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer.
+#include "HexagonGenAsmWriter.inc"
+
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+                                      const GlobalValue *GV) const {
+
+  // For basic block level alignment, use falign.
+  if (!GV) {
+    OutStreamer.EmitRawText(StringRef("\t.falign"));
+    return;
+  }
+
+  AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    dbgs() << "printOp() does not handle immediate values\n";
+    abort();
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    // FIXME: PIC relocation model.
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    // Computing the address of a global symbol, not calling it.
+    O << *Mang->getSymbol(MO.getGlobal());
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  if (MBB->hasAddressTaken()) {
+    return false;
+  }
+  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode,
+                                      raw_ostream &OS) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'c': // Don't print "$" before a global var name or constant.
+      // Hexagon never has a prefix.
+      printOperand(MI, OpNo, OS);
+      return false;
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        OS << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, OS);
+  return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  const MachineOperand &Base  = MI->getOperand(OpNo);
+  const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+  if (Base.isReg())
+    printOperand(MI, OpNo, O);
+  else
+    assert(0 && "Unimplemented");
+
+  if (Offset.isImm()) {
+    if (Offset.getImm())
+      O << " + #" << Offset.getImm();
+  }
+  else
+    assert(0 && "Unimplemented");
+
+  return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+                                              unsigned OpNo,
+                                              raw_ostream &O) {
+  assert(0 && "Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream O(Str);
+
+  const MachineFunction* MF = MI->getParent()->getParent();
+  const HexagonMachineFunctionInfo* MFI =
+    (const HexagonMachineFunctionInfo*)
+    MF->getInfo<HexagonMachineFunctionInfo>();
+
+
+
+  // Print a brace for the beginning of the packet.
+  if (MFI->isStartPacket(MI)) {
+    O << "\t{" << '\n';
+  }
+
+  DEBUG( O << "// MI = " << *MI << '\n';);
+
+  // Indent
+  O << "\t";
+
+
+  if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+    if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) {
+      O << "\t{ nop }";
+    } else {
+    O << "}";
+    }
+    printInstruction(MI, O);
+  } else if (MI->getOpcode() == Hexagon::STriwt) {
+    //
+    // Handle truncated store on Hexagon.
+    //
+    O << "\tmemw(";
+    printHexagonMEMriOperand(MI, 0, O);
+
+    O << ") = ";
+    unsigned SubRegNum =
+      TM.getRegisterInfo()->getSubReg(MI->getOperand(2)
+                                      .getReg(), Hexagon::subreg_loreg);
+    const char *SubRegName = getRegisterName(SubRegNum);
+    O << SubRegName << '\n';
+  } else if (MI->getOpcode() == Hexagon::MPYI_rin) {
+    // Handle multipy with -ve constant on Hexagon:
+    // "$dst =- mpyi($src1, #$src2)"
+      printOperand(MI, 0, O);
+    O << " =- mpyi(";
+    printOperand(MI, 1, O);
+    O << ", #";
+    printHexagonNegImmOperand(MI, 2, O);
+    O << ")";
+  } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) {
+    //
+    // Handle memw(Rs+u6:2) [+-]= #U5
+    //
+    O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) {
+    //
+    // Handle memw(Rs+u6:2) [+-]= #U5
+    //
+    O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) {
+    //
+    // Handle memh(Rs+u6:1) [+-]= #U5
+    //
+    O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) {
+    //
+    // Handle memh(Rs+u6:1) [+-]= #U5
+    //
+    O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) {
+    //
+    // Handle memb(Rs+u6:1) [+-]= #U5
+    //
+    O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) {
+    //
+    // Handle memb(Rs+u6:1) [+-]= #U5
+    //
+    O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") ";
+    int addend = MI->getOperand(2).getImm();
+    if (addend < 0)
+      O << "-= " << "#" << -addend << '\n';
+    else
+      O << "+= " << "#" << addend << '\n';
+  } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) {
+    //
+    // Handle Pd=cmpb.gt(Rs,#s8)
+    //
+    O << "\t";
+    printRegister(MI->getOperand(0), false, O);
+    O << " = cmpb.gt(";
+    printRegister(MI->getOperand(1), false, O);
+    O << ", ";
+    int val = MI->getOperand(2).getImm() >> 24;
+    O << "#" << val << ")" << '\n';
+  } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) {
+    //
+    // Handle Pd=cmph.eq(Rs,#8)
+    //
+    O << "\t";
+    printRegister(MI->getOperand(0), false, O);
+    O << " = cmph.eq(";
+    printRegister(MI->getOperand(1), false, O);
+    O << ", ";
+    int val = MI->getOperand(2).getImm();
+    assert((((0 <= val) && (val <= 127)) ||
+            ((65408 <= val) && (val <= 65535))) &&
+           "Not in correct range!");
+    if (val >= 65408) val -= 65536;
+    O << "#" << val << ")" << '\n';
+  } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) {
+    //
+    // Handle Pd=cmph.gt(Rs,#8)
+    //
+    O << "\t";
+    printRegister(MI->getOperand(0), false, O);
+    O << " = cmph.gt(";
+    printRegister(MI->getOperand(1), false, O);
+    O << ", ";
+    int val = MI->getOperand(2).getImm() >> 16;
+    O << "#" << val << ")" << '\n';
+  } else {
+    printInstruction(MI, O);
+  }
+
+  // Print a brace for the end of the packet.
+  if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) {
+    O << "\n\t}" << '\n';
+  }
+
+  if (AlignCalls && MI->getDesc().isCall()) {
+    O << "\n\t.falign" << "\n";
+  }
+
+  OutStreamer.EmitRawText(O.str());
+  return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+//   for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+//        Name != E; ++Name)
+//     if (isprint(*Name))
+//       OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+                                                    int OpNo, raw_ostream &O) {
+  const MachineOperand &MO1 = MI->getOperand(OpNo);
+  const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+  O << getRegisterName(MO1.getReg())
+    << " + #"
+    << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+                                           raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+         "Expecting global address");
+
+  O << *Mang->getSymbol(MO.getGlobal());
+  if (MO.getOffset() != 0) {
+    O << " + ";
+    O << MO.getOffset();
+  }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+                                       raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+	  "Expecting jump table index");
+
+  // Hexagon_TODO: Do we need name mangling?
+  O << *GetJTISymbol(MO.getIndex());
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+  RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+}
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 0000000..38000e7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,240 @@
+//===---- HexagonCFGOptimizer.cpp - CFG optimizations ---------------------===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+  HexagonTargetMachine& QTM;
+  const HexagonSubtarget &QST;
+
+  void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+  static char ID;
+  HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+                                                  QTM(TM),
+                                                  QST(*TM.getSubtargetImpl()) {}
+
+  const char *getPassName() const {
+    return "Hexagon CFG Optimizer";
+  }
+  bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+  return (Opc == Hexagon::JMP_Pred) || (Opc == Hexagon::JMP_PredNot)
+    || (Opc == Hexagon::JMP_PredPt) || (Opc == Hexagon::JMP_PredNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+  return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+                                               MachineBasicBlock* NewTarget) {
+  const HexagonInstrInfo *QII = QTM.getInstrInfo();
+  int NewOpcode = 0;
+  switch(MI->getOpcode()) {
+  case Hexagon::JMP_Pred:
+    NewOpcode = Hexagon::JMP_PredNot;
+    break;
+
+  case Hexagon::JMP_PredNot:
+    NewOpcode = Hexagon::JMP_Pred;
+    break;
+
+  case Hexagon::JMP_PredPt:
+    NewOpcode = Hexagon::JMP_PredNotPt;
+    break;
+
+  case Hexagon::JMP_PredNotPt:
+    NewOpcode = Hexagon::JMP_PredPt;
+    break;
+
+  default:
+    assert(0 && "Cannot handle this case");
+  }
+
+  MI->setDesc(QII->get(NewOpcode));
+  MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+
+    // Traverse the basic block.
+    MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+    if (MII != MBB->end()) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (IsConditionalBranch(Opc)) {
+
+        //
+        // (Case 1) Transform the code if the following condition occurs:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...next block in layout is BB3...
+        //   BB3: ...
+        //
+        //  Transform this to:
+        //  BB1: if (!p0) jump BB4
+        //  Remove BB2
+        //  BB3: ...
+        //
+        // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...other basic blocks ...
+        //   BB4:
+        //   ...not a fall-thru
+        //   BB3: ...
+        //     jump BB4
+        //
+        // Transform this to:
+        //   BB1: if (!p0) jump BB4
+        //   Remove BB2
+        //   BB3: ...
+        //   BB4: ...
+        //
+        unsigned NumSuccs = MBB->succ_size();
+        MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+        MachineBasicBlock* FirstSucc = *SI;
+        MachineBasicBlock* SecondSucc = *(++SI);
+        MachineBasicBlock* LayoutSucc = NULL;
+        MachineBasicBlock* JumpAroundTarget = NULL;
+
+        if (MBB->isLayoutSuccessor(FirstSucc)) {
+          LayoutSucc = FirstSucc;
+          JumpAroundTarget = SecondSucc;
+        } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+          LayoutSucc = SecondSucc;
+          JumpAroundTarget = FirstSucc;
+        } else {
+          // Odd case...cannot handle.
+        }
+
+        // The target of the unconditional branch must be JumpAroundTarget.
+        // TODO: If not, we should not invert the unconditional branch.
+        MachineBasicBlock* CondBranchTarget = NULL;
+        if ((MI->getOpcode() == Hexagon::JMP_Pred) ||
+            (MI->getOpcode() == Hexagon::JMP_PredNot)) {
+          CondBranchTarget = MI->getOperand(1).getMBB();
+        }
+
+        if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+          continue;
+        }
+
+        if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+          // Ensure that BB2 has one instruction -- an unconditional jump.
+          if ((LayoutSucc->size() == 1) &&
+              IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+            MachineBasicBlock* UncondTarget =
+              LayoutSucc->front().getOperand(0).getMBB();
+            // Check if the layout successor of BB2 is BB3.
+            bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+            bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+              JumpAroundTarget->size() >= 1 &&
+              IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+              JumpAroundTarget->pred_size() == 1 &&
+              JumpAroundTarget->succ_size() == 1;
+
+            if (case1 || case2) {
+              InvertAndChangeJumpTarget(MI, UncondTarget);
+              MBB->removeSuccessor(JumpAroundTarget);
+              MBB->addSuccessor(UncondTarget);
+
+              // Remove the unconditional branch in LayoutSucc.
+              LayoutSucc->erase(LayoutSucc->begin());
+              LayoutSucc->removeSuccessor(UncondTarget);
+              LayoutSucc->addSuccessor(JumpAroundTarget);
+
+              // This code performs the conversion for case 2, which moves
+              // the block to the fall-thru case (BB3 in the code above).
+              if (case2 && !case1) {
+                JumpAroundTarget->moveAfter(LayoutSucc);
+                // only move a block if it doesn't have a fall-thru. otherwise
+                // the CFG will be incorrect.
+                if (!UncondTarget->canFallThrough()) {
+                  UncondTarget->moveAfter(JumpAroundTarget);
+                }
+              }
+
+              //
+              // Correct live-in information. Is used by post-RA scheduler
+              // The live-in to LayoutSucc is now all values live-in to
+              // JumpAroundTarget.
+              //
+              std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+                                               LayoutSucc->livein_end());
+              std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+                                              JumpAroundTarget->livein_end());
+              for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+                LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+              }
+              for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+                LayoutSucc->addLiveIn(NewLiveIn[i]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+  return new HexagonCFGOptimizer(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 0000000..bd9608b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+  // All arguments get passed in integer registers if there is space.
+  CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 0000000..2e51dbf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,207 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "Hexagon.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+                                 const TargetMachine &tm,
+                                 SmallVector<CCValAssign, 16> &locs,
+                                 LLVMContext &c)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+  // No stack is used.
+  StackOffset = 0;
+
+  UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+                                EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                int MinSize, int MinAlign,
+                                ISD::ArgFlagsTy ArgFlags) {
+  unsigned Align = ArgFlags.getByValAlign();
+  unsigned Size  = ArgFlags.getByValSize();
+  if (MinSize > (int)Size)
+    Size = MinSize;
+  if (MinAlign > (int)Align)
+    Align = MinAlign;
+  unsigned Offset = AllocateStack(Size, Align);
+
+  addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+                             LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+  UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+  if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
+    for (; (Reg = *RegAliases); ++RegAliases)
+      UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                        Hexagon_CCAssignFn Fn,
+                                        unsigned SretValueInRegs) {
+  unsigned NumArgs = Ins.size();
+  unsigned i = 0;
+
+  // If the function returns a small struct in registers, skip
+  // over the first (dummy) argument.
+  if (SretValueInRegs != 0) {
+    ++i;
+  }
+
+
+  for (; i != NumArgs; ++i) {
+    EVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+      dbgs() << "Formal argument #" << i << " has unhandled type "
+             << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               Hexagon_CCAssignFn Fn,
+                               unsigned SretValueInRegs) {
+
+  // For Hexagon, Return small structures in registers.
+  if (SretValueInRegs != 0) {
+    if (SretValueInRegs <= 32) {
+      unsigned Reg = Hexagon::R0;
+      addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+                                 CCValAssign::Full));
+      return;
+    }
+    if (SretValueInRegs <= 64) {
+      unsigned Reg = Hexagon::D0;
+      addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+                                 CCValAssign::Full));
+      return;
+    }
+  }
+
+
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    EVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+      dbgs() << "Return operand #" << i << " has unhandled type "
+           << VT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+                                     &Outs,
+                                     Hexagon_CCAssignFn Fn,
+                                     int NonVarArgsParams,
+                                     unsigned SretValueSize) {
+  unsigned NumOps = Outs.size();
+
+  unsigned i = 0;
+  // If the called function returns a small struct in registers, skip
+  // the first actual parameter. We do not want to pass a pointer to
+  // the stack location.
+  if (SretValueSize != 0) {
+    ++i;
+  }
+
+  for (; i != NumOps; ++i) {
+    EVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+           NonVarArgsParams, i+1, false)) {
+      dbgs() << "Call operand #" << i << " has unhandled type "
+           << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+                                     SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                                     Hexagon_CCAssignFn Fn) {
+  unsigned NumOps = ArgVTs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    EVT ArgVT = ArgVTs[i];
+    ISD::ArgFlagsTy ArgFlags = Flags[i];
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+           false)) {
+      dbgs() << "Call operand #" << i << " has unhandled type "
+           << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   Hexagon_CCAssignFn Fn,
+                                   unsigned SretValueInRegs) {
+
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    EVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+      if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+        dbgs() << "Call result #" << i << " has unhandled type "
+               << VT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+         false)) {
+    dbgs() << "Call result has unhandled type "
+         << VT.getEVTString() << "\n";
+    abort();
+  }
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 0000000..1f601e8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+  class TargetRegisterInfo;
+  class TargetMachine;
+  class Hexagon_CCState;
+  class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+                              EVT LocVT, CCValAssign::LocInfo LocInfo,
+                              ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+                              int NonVarArgsParams,
+                              int CurrentParam,
+                              bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values.  It captures which registers are already assigned and which
+/// stack slots are used.  It provides accessors to allocate these values.
+class Hexagon_CCState {
+  CallingConv::ID CallingConv;
+  bool IsVarArg;
+  const TargetMachine &TM;
+  const TargetRegisterInfo &TRI;
+  SmallVector<CCValAssign, 16> &Locs;
+  LLVMContext &Context;
+
+  unsigned StackOffset;
+  SmallVector<uint32_t, 16> UsedRegs;
+public:
+  Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+                SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+  void addLoc(const CCValAssign &V) {
+    Locs.push_back(V);
+  }
+
+  LLVMContext &getContext() const { return Context; }
+  const TargetMachine &getTarget() const { return TM; }
+  unsigned getCallingConv() const { return CallingConv; }
+  bool isVarArg() const { return IsVarArg; }
+
+  unsigned getNextStackOffset() const { return StackOffset; }
+
+  /// isAllocated - Return true if the specified register (or an alias) is
+  /// allocated.
+  bool isAllocated(unsigned Reg) const {
+    return UsedRegs[Reg/32] & (1 << (Reg&31));
+  }
+
+  /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+  /// incorporating info about the formals into this state.
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+  /// incorporating info about the result values into this state.
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+  /// about the passed values into this state.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+                           unsigned SretValueSize);
+
+  /// AnalyzeCallOperands - Same as above except it takes vectors of types
+  /// and argument flags.
+  void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           Hexagon_CCAssignFn Fn);
+
+  /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeCallResult - Same as above except it's specialized for calls which
+  /// produce a single value.
+  void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+  /// getFirstUnallocated - Return the first unallocated register in the set, or
+  /// NumRegs if they are all allocated.
+  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (!isAllocated(Regs[i]))
+        return i;
+    return NumRegs;
+  }
+
+  /// AllocateReg - Attempt to allocate one register.  If it is not available,
+  /// return zero.  Otherwise, return the register, marking it and any aliases
+  /// as allocated.
+  unsigned AllocateReg(unsigned Reg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with extra register to be shadowed.
+  unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateReg - Attempt to allocate one of the specified registers.  If none
+  /// are available, return zero.  Otherwise, return the first one available,
+  /// marking it and any aliases as allocated.
+  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc];
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with list of registers to be shadowed.
+  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+                       unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateStack - Allocate a chunk of stack space with the specified size
+  /// and alignment.
+  unsigned AllocateStack(unsigned Size, unsigned Align) {
+    assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+    StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+    unsigned Result = StackOffset;
+    StackOffset += Size;
+    return Result;
+  }
+
+  // HandleByVal - Allocate a stack slot large enough to pass an argument by
+  // value. The size and alignment information of the argument is encoded in its
+  // parameter attribute.
+  void HandleByVal(unsigned ValNo, EVT ValVT,
+                   EVT LocVT, CCValAssign::LocInfo LocInfo,
+                   int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+  /// MarkAllocated - Mark a register and all of its aliases as allocated.
+  void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 0000000..cb73ae0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,184 @@
+//===--- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===////
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly.  So, when these registers must be spilled a general 
+// purpose register must be found and the value copied to/from it from/to 
+// the predicate register.  This code currently does not use the register 
+// scavenger mechanism available in the allocator.  There are two registers
+// reserved to allow spilling/restoring predicate registers.  One is used to
+// hold the predicate value.  The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+    HexagonTargetMachine& QTM;
+    const HexagonSubtarget &QST;
+
+ public:
+    static char ID;
+    HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+    const char *getPassName() const {
+      return "Hexagon Expand Predicate Spill Code";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+  const HexagonInstrInfo *TII = QTM.getInstrInfo();
+  const HexagonRegisterInfo *RegInfo = QTM.getRegisterInfo();
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::STriw_pred) {
+        // STriw_pred [R30], ofst, SrcReg;
+        unsigned FP = MI->getOperand(0).getReg();
+        assert(FP == RegInfo->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(1).isImm() && "Not an offset");
+        int Offset = MI->getOperand(1).getImm();
+        int SrcReg = MI->getOperand(2).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+               "Not a predicate register");
+        if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::STriw))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0)
+              .addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                    HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+                    addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::LDriw_pred) {
+        // DstReg = LDriw_pred [R30], ofst.
+        int DstReg = MI->getOperand(0).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+               "Not a predicate register");
+        unsigned FP = MI->getOperand(1).getReg();
+        assert(FP == RegInfo->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(2).isImm() && "Not an offset");
+        int Offset = MI->getOperand(2).getImm();
+        if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP)
+              .addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                    HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                    DstReg).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      }
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+  return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 0000000..78e0b1c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,333 @@
+//==-- HexagonFrameLowering.cpp - Define frame lowering         --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonFrameLowering.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+                       "disable-hexagon-dealloc-ret",
+                       cl::Hidden,
+                       cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target.
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  const HexagonRegisterInfo *QRI =
+    static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  determineFrameLayout(MF);
+
+  // Check if frame moves are needed for EH.
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+    !MF.getFunction()->needsUnwindTableEntry();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  int NumBytes = (int) MFI->getStackSize();
+
+  // LLVM expects allocframe not to be the first instruction in the
+  // basic block.
+  MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+  //
+  // ALLOCA adjust regs.  Iterate over ADJDYNALLOC nodes and change the offset.
+  //
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  const std::vector<MachineInstr*>& AdjustRegs =
+    FuncInfo->getAllocaAdjustInsts();
+  for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+         e = AdjustRegs.end();
+       i != e; ++i) {
+    MachineInstr* MI = *i;
+    assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+           "Expected adjust alloca node");
+
+    MachineOperand& MO = MI->getOperand(2);
+    assert(MO.isImm() && "Expected immediate");
+    MO.setImm(MFI->getMaxCallFrameSize());
+  }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+   // Advance CFA. DW_CFA_def_cfa
+   unsigned FPReg = QRI->getFrameRegister();
+   unsigned RAReg = QRI->getRARegister();
+
+   MachineLocation Dst(MachineLocation::VirtualFP);
+   MachineLocation Src(FPReg, -8);
+   Moves.push_back(MachineMove(0, Dst, Src));
+
+   // R31 = (R31 - #4)
+   MachineLocation LRDst(RAReg, -4);
+   MachineLocation LRSrc(RAReg);
+   Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+   // R30 = (R30 - #8)
+   MachineLocation SPDst(FPReg, -8);
+   MachineLocation SPSrc(FPReg);
+   Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+  //
+  // Only insert ALLOCFRAME if we need to.
+  //
+  if (hasFP(MF)) {
+    // Check for overflow.
+    // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+    const int ALLOCFRAME_MAX = 16384;
+    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+    if (NumBytes >= ALLOCFRAME_MAX) {
+      // Emit allocframe(#0).
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+      // Subtract offset from frame pointer.
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+                                      HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+                                      QRI->getStackRegister()).
+                                      addReg(QRI->getStackRegister()).
+                                      addReg(HEXAGON_RESERVED_REG_1);
+    } else {
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+    }
+  }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  DebugLoc dl = MBBI->getDebugLoc();
+  //
+  // Only insert deallocframe if we need to.
+  //
+  if (hasFP(MF)) {
+    MachineBasicBlock::iterator MBBI = prior(MBB.end());
+    MachineBasicBlock::iterator MBBI_end = MBB.end();
+    //
+    // For Hexagon, we don't need the frame size.
+    //
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    int NumBytes = (int) MFI->getStackSize();
+
+    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+    // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+    // versions.
+    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+                        && !DisableDeallocRet) {
+      // Remove jumpr node.
+      MBB.erase(MBBI);
+      // Add dealloc_return.
+      BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+        .addImm(NumBytes);
+    } else { // Add deallocframe for V2 and V3.
+      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+    }
+  }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+          FuncInfo->hasClobberLR() );
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+                                        MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  if (CSI.empty()) {
+    return false;
+  }
+
+  // We can only schedule double loads if we spill contiguous callee-saved regs
+  // For instance, we cannot scheduled double-word loads if we spill r24,
+  // r26, and r27.
+  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+  // above.
+  bool ContiguousRegs = true;
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    //
+    // Check if we can use a double-word store.
+    //
+    const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+
+    // Assume that there is exactly one superreg.
+    assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+    bool CanUseDblStore = false;
+    const TargetRegisterClass* SuperRegClass = 0;
+
+    if (ContiguousRegs && (i < CSI.size()-1)) {
+      const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+      assert(SuperRegNext[0] && !SuperRegNext[1] &&
+             "Expected exactly one superreg");
+      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+      CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+    }
+
+
+    if (CanUseDblStore) {
+      TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+                              CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+      MBB.addLiveIn(SuperReg[0]);
+      ++i;
+    } else {
+      // Cannot use a double-word store.
+      ContiguousRegs = false;
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+                              TRI);
+      MBB.addLiveIn(Reg);
+    }
+  }
+  return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+                                        MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  if (CSI.empty()) {
+    return false;
+  }
+
+  // We can only schedule double loads if we spill contiguous callee-saved regs
+  // For instance, we cannot scheduled double-word loads if we spill r24,
+  // r26, and r27.
+  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+  // above.
+  bool ContiguousRegs = true;
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    //
+    // Check if we can use a double-word load.
+    //
+    const unsigned* SuperReg = TRI->getSuperRegisters(Reg);
+    const TargetRegisterClass* SuperRegClass = 0;
+
+    // Assume that there is exactly one superreg.
+    assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+    bool CanUseDblLoad = false;
+    if (ContiguousRegs && (i < CSI.size()-1)) {
+      const unsigned* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+      assert(SuperRegNext[0] && !SuperRegNext[1] &&
+             "Expected exactly one superreg");
+      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+      CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+    }
+
+
+    if (CanUseDblLoad) {
+      TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+                               SuperRegClass, TRI);
+      MBB.addLiveIn(SuperReg[0]);
+      ++i;
+    } else {
+      // Cannot use a double-word load.
+      ContiguousRegs = false;
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+      MBB.addLiveIn(Reg);
+    }
+  }
+  return true;
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                              int FI) const {
+  return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 0000000..ad87f11
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,50 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+  const HexagonSubtarget &STI;
+  void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+  explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  virtual bool
+  spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const;
+  virtual bool
+  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI,
+                              const std::vector<CalleeSavedInfo> &CSI,
+                              const TargetRegisterInfo *TRI) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 0000000..c1abc4a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,644 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction.  The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+//  - Countable loops (w/ ind. var for a trip count)
+//  - Assumes loops are normalized by IndVarSimplify
+//  - Try inner-most loops first
+//  - No nested hardware loops.
+//  - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+  class CountValue;
+  struct HexagonHardwareLoops : public MachineFunctionPass {
+    MachineLoopInfo       *MLI;
+    MachineRegisterInfo   *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID;   // Pass identification, replacement for typeid
+
+    HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+    /// induction variable.
+    /// Should be defined in MachineLoop. Based upon version in class Loop.
+    const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
+
+    /// getTripCount - Return a loop-invariant LLVM register indicating the
+    /// number of times the loop will be executed.  If the trip-count cannot
+    /// be determined, this return null.
+    CountValue *getTripCount(MachineLoop *L) const;
+
+    /// isInductionOperation - Return true if the instruction matches the
+    /// pattern for an opertion that defines an induction variable.
+    bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+    /// isInvalidOperation - Return true if the instruction is not valid within
+    /// a hardware loop.
+    bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+    /// containsInavlidInstruction - Return true if the loop contains an
+    /// instruction that inhibits using the hardware loop.
+    bool containsInvalidInstruction(MachineLoop *L) const;
+
+    /// converToHardwareLoop - Given a loop, check if we can convert it to a
+    /// hardware loop.  If so, then perform the conversion and return true.
+    bool convertToHardwareLoop(MachineLoop *L);
+
+  };
+
+  char HexagonHardwareLoops::ID = 0;
+
+
+  // CountValue class - Abstraction for a trip count of a loop. A
+  // smaller vesrsion of the MachineOperand class without the concerns
+  // of changing the operand representation.
+  class CountValue {
+  public:
+    enum CountValueType {
+      CV_Register,
+      CV_Immediate
+    };
+  private:
+    CountValueType Kind;
+    union Values {
+      unsigned RegNum;
+      int64_t ImmVal;
+      Values(unsigned r) : RegNum(r) {}
+      Values(int64_t i) : ImmVal(i) {}
+    } Contents;
+    bool isNegative;
+
+  public:
+    CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+                                       isNegative(neg) {}
+    explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+                                     isNegative(i < 0) {}
+    CountValueType getType() const { return Kind; }
+    bool isReg() const { return Kind == CV_Register; }
+    bool isImm() const { return Kind == CV_Immediate; }
+    bool isNeg() const { return isNegative; }
+
+    unsigned getReg() const {
+      assert(isReg() && "Wrong CountValue accessor");
+      return Contents.RegNum;
+    }
+    void setReg(unsigned Val) {
+      Contents.RegNum = Val;
+    }
+    int64_t getImm() const {
+      assert(isImm() && "Wrong CountValue accessor");
+      if (isNegative) {
+        return -Contents.ImmVal;
+      }
+      return Contents.ImmVal;
+    }
+    void setImm(int64_t Val) {
+      Contents.ImmVal = Val;
+    }
+
+    void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+      if (isReg()) { OS << PrintReg(getReg()); }
+      if (isImm()) { OS << getImm(); }
+    }
+  };
+
+  struct HexagonFixupHwLoops : public MachineFunctionPass {
+  public:
+    static char ID;     // Pass identification, replacement for typeid.
+
+    HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// Maximum distance between the loop instr and the basic block.
+    /// Just an estimate.
+    static const unsigned MAX_LOOP_DISTANCE = 200;
+
+    /// fixupLoopInstrs - Check the offset between each loop instruction and
+    /// the loop basic block to determine if we can use the LOOP instruction
+    /// or if we need to set the LC/SA registers explicitly.
+    bool fixupLoopInstrs(MachineFunction &MF);
+
+    /// convertLoopInstr - Add the instruction to set the LC and SA registers
+    /// explicitly.
+    void convertLoopInstr(MachineFunction &MF,
+                          MachineBasicBlock::iterator &MII,
+                          RegScavenger &RS);
+
+  };
+
+  char HexagonFixupHwLoops::ID = 0;
+
+} // end anonymous namespace
+
+
+/// isHardwareLoop - Returns true if the instruction is a hardware loop
+/// instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::LOOP0_r ||
+    MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::CMPEQri;
+}
+
+
+/// createHexagonHardwareLoops - Factory for creating
+/// the hardware loop phase.
+FunctionPass *llvm::createHexagonHardwareLoops() {
+  return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+  bool Changed = false;
+
+  // get the loop information
+  MLI = &getAnalysis<MachineLoopInfo>();
+  // get the register information
+  MRI = &MF.getRegInfo();
+  // the target specific instructio info.
+  TII = MF.getTarget().getInstrInfo();
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I) {
+    MachineLoop *L = *I;
+    if (!L->getParentLoop()) {
+      Changed |= convertToHardwareLoop(L);
+    }
+  }
+
+  return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero.  If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+const MachineInstr
+*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+  assert(PI != TopMBB->pred_end() &&
+         "Loop must have more than one incoming edge!");
+  MachineBasicBlock *Backedge = *PI++;
+  if (PI == TopMBB->pred_end()) return 0;  // dead loop
+  MachineBasicBlock *Incoming = *PI++;
+  if (PI != TopMBB->pred_end()) return 0;  // multiple backedges?
+
+  // make sure there is one incoming and one backedge and determine which
+  // is which.
+  if (L->contains(Incoming)) {
+    if (L->contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!L->contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical induction variable:
+  //   - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+  //   - The recurrence comes from the backedge.
+  //   - the definition is an induction operatio.n
+  for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+       I != E && I->isPHI(); ++I) {
+    const MachineInstr *MPhi = &*I;
+    unsigned DefReg = MPhi->getOperand(0).getReg();
+    for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+      // Check each operand for the value from the backedge.
+      MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+      if (L->contains(MBB)) { // operands comes from the backedge
+        // Check if the definition is an induction operation.
+        const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+        if (isInductionOperation(DI, DefReg)) {
+          return MPhi;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed.  The trip count can
+/// be either a register or a constant value.  If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable.  We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
+  // Check that the loop has a induction variable.
+  const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
+  if (IV_Inst == 0) return 0;
+
+  // Canonical loops will end with a 'cmpeq_ri IV, Imm',
+  //  if Imm is 0, get the count from the PHI opnd
+  //  if Imm is -M, than M is the count
+  //  Otherwise, Imm is the count
+  const MachineOperand *IV_Opnd;
+  const MachineOperand *InitialValue;
+  if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+    InitialValue = &IV_Inst->getOperand(1);
+    IV_Opnd = &IV_Inst->getOperand(3);
+  } else {
+    InitialValue = &IV_Inst->getOperand(3);
+    IV_Opnd = &IV_Inst->getOperand(1);
+  }
+
+  // Look for the cmp instruction to determine if we
+  // can get a useful trip count.  The trip count can
+  // be either a register or an immediate.  The location
+  // of the value depends upon the type (reg or imm).
+  while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+    const MachineInstr *MI = IV_Opnd->getParent();
+    if (L->contains(MI) && isCompareEqualsImm(MI)) {
+      const MachineOperand &MO = MI->getOperand(2);
+      assert(MO.isImm() && "IV Cmp Operand should be 0");
+      int64_t ImmVal = MO.getImm();
+
+      const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+      assert(L->contains(IV_DefInstr->getParent()) &&
+             "IV definition should occurs in loop");
+      int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
+
+      if (ImmVal == 0) {
+        // Make sure the induction variable changes by one on each iteration.
+        if (iv_value != 1 && iv_value != -1) {
+          return 0;
+        }
+        return new CountValue(InitialValue->getReg(), iv_value > 0);
+      } else {
+        assert(InitialValue->isReg() && "Expecting register for init value");
+        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
+        if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
+          int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+          if ((count % iv_value) != 0) {
+            return 0;
+          }
+          return new CountValue(count/iv_value);
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+///    add iv, c
+///
+bool
+HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
+                                           unsigned IVReg) const {
+  return (MI->getOpcode() ==
+          Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// hardware loop.
+bool
+HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+  // call is not allowed because the callee may use a hardware loop
+  if (MI->getDesc().isCall()) {
+    return true;
+  }
+  // do not allow nested hardware loops
+  if (isHardwareLoop(MI)) {
+    return true;
+  }
+  // check if the instruction defines a hardware loop register
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
+         MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the hardware loop function.
+///
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+  const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = Blocks[i];
+    for (MachineBasicBlock::iterator
+           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+      const MachineInstr *MI = &*MII;
+      if (isInvalidLoopOperation(MI)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// converToHardwareLoop - check if the loop is a candidate for
+/// converting to a hardware loop.  If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first.  A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+  bool Changed = false;
+  // Process nested loops first.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    Changed |= convertToHardwareLoop(*I);
+  }
+  // If a nested loop has been converted, then we can't convert this loop.
+  if (Changed) {
+    return Changed;
+  }
+  // Are we able to determine the trip count for the loop?
+  CountValue *TripCount = getTripCount(L);
+  if (TripCount == 0) {
+    return false;
+  }
+  // Does the loop contain any invalid instructions?
+  if (containsInvalidInstruction(L)) {
+    return false;
+  }
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  // No preheader means there's not place for the loop instr.
+  if (Preheader == 0) {
+    return false;
+  }
+  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+  MachineBasicBlock *LastMBB = L->getExitingBlock();
+  // Don't generate hw loop if the loop has more than one exit.
+  if (LastMBB == 0) {
+    return false;
+  }
+  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+  // Determine the loop start.
+  MachineBasicBlock *LoopStart = L->getTopBlock();
+  if (L->getLoopLatch() != LastMBB) {
+    // When the exit and latch are not the same, use the latch block as the
+    // start.
+    // The loop start address is used only after the 1st iteration, and the loop
+    // latch may contains instrs. that need to be executed after the 1st iter.
+    LoopStart = L->getLoopLatch();
+    // Make sure the latch is a successor of the exit, otherwise it won't work.
+    if (!LastMBB->isSuccessor(LoopStart)) {
+      return false;
+    }
+  }
+
+  // Convert the loop to a hardware loop
+  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+
+  if (TripCount->isReg()) {
+    // Create a copy of the loop count register.
+    MachineFunction *MF = LastMBB->getParent();
+    const TargetRegisterClass *RC =
+      MF->getRegInfo().getRegClass(TripCount->getReg());
+    unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+    if (TripCount->isNeg()) {
+      unsigned CountReg1 = CountReg;
+      CountReg = MF->getRegInfo().createVirtualRegister(RC);
+      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+              TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
+    }
+
+    // Add the Loop instruction to the begining of the loop.
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+  } else {
+    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+    // Add the Loop immediate instruction to the beginning of the loop.
+    int64_t CountImm = TripCount->getImm();
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+  }
+
+  // Make sure the loop start always has a reference in the CFG.  We need to
+  // create a BlockAddress operand to get this mechanism to work both the
+  // MachineBasicBlock and BasicBlock objects need the flag set.
+  LoopStart->setHasAddressTaken();
+  // This line is needed to set the hasAddressTaken flag on the BasicBlock
+  // object
+  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+  // Replace the loop branch with an endloop instruction.
+  DebugLoc dl = LastI->getDebugLoc();
+  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+  // The loop ends with either:
+  //  - a conditional branch followed by an unconditional branch, or
+  //  - a conditional branch to the loop start.
+  if (LastI->getOpcode() == Hexagon::JMP_Pred ||
+      LastI->getOpcode() == Hexagon::JMP_PredNot) {
+    // delete one and change/add an uncond. branch to out of the loop
+    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+    LastI = LastMBB->erase(LastI);
+    if (!L->contains(BranchTarget)) {
+      if (LastI != LastMBB->end()) {
+        TII->RemoveBranch(*LastMBB);
+      }
+      SmallVector<MachineOperand, 0> Cond;
+      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+    }
+  } else {
+    // Conditional branch to loop start; just delete it.
+    LastMBB->erase(LastI);
+  }
+  delete TripCount;
+
+  ++NumHWLoops;
+  return true;
+}
+
+/// createHexagonFixupHwLoops - Factory for creating the hardware loop
+/// phase.
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+  return new HexagonFixupHwLoops();
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
+
+  bool Changed = fixupLoopInstrs(MF);
+  return Changed;
+}
+
+/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count).  This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks.  The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+  // Offset of the current instruction from the start.
+  unsigned InstOffset = 0;
+  // Map for each basic block to it's first instruction.
+  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+  // First pass - compute the offset of each basic block.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    BlockToInstOffset[MBB] = InstOffset;
+    InstOffset += (MBB->size() * 4);
+  }
+
+  // Second pass - check each loop instruction to see if it needs to
+  // be converted.
+  InstOffset = 0;
+  bool Changed = false;
+  RegScavenger RS;
+
+  // Loop over all the basic blocks.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    InstOffset = BlockToInstOffset[MBB];
+    RS.enterBasicBlock(MBB);
+
+    // Loop over all the instructions.
+    MachineBasicBlock::iterator MIE = MBB->end();
+    MachineBasicBlock::iterator MII = MBB->begin();
+    while (MII != MIE) {
+      if (isHardwareLoop(MII)) {
+        RS.forward(MII);
+        assert(MII->getOperand(0).isMBB() &&
+               "Expect a basic block as loop operand");
+        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+        diff = (diff > 0 ? diff : -diff);
+        if ((unsigned)diff > MAX_LOOP_DISTANCE) {
+          // Convert to explicity setting LC0 and SA0.
+          convertLoopInstr(MF, MII, RS);
+          MII = MBB->erase(MII);
+          Changed = true;
+        } else {
+          ++MII;
+        }
+      } else {
+        ++MII;
+      }
+      InstOffset += 4;
+    }
+  }
+
+  return Changed;
+
+}
+
+/// convertLoopInstr - convert a loop instruction to a sequence of instructions
+/// that set the lc and sa register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+                                           MachineBasicBlock::iterator &MII,
+                                           RegScavenger &RS) {
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  MachineBasicBlock *MBB = MII->getParent();
+  DebugLoc DL = MII->getDebugLoc();
+  unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+
+  // First, set the LC0 with the trip count.
+  if (MII->getOperand(1).isReg()) {
+    // Trip count is a register
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(MII->getOperand(1).getReg());
+  } else {
+    // Trip count is an immediate.
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+      .addImm(MII->getOperand(1).getImm());
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(Scratch);
+  }
+  // Then, set the SA0 with the loop start address.
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+    .addMBB(MII->getOperand(0).getMBB());
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 0000000..4deab9f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1495 @@
+//==-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon ----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+  /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const HexagonSubtarget &Subtarget;
+
+  // Keep a reference to HexagonTargetMachine.
+  HexagonTargetMachine& TM;
+  const HexagonInstrInfo *TII;
+
+public:
+  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
+    : SelectionDAGISel(targetmachine),
+      Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+      TM(targetmachine),
+      TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+
+  }
+
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern Selectors.
+  bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+  virtual const char *getPassName() const {
+    return "Hexagon DAG->DAG Pattern Instruction Selection";
+  }
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+  bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+  SDNode *SelectLoad(SDNode *N);
+  SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+  SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+  SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        DebugLoc dl);
+  SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        DebugLoc dl);
+  SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+  SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+  SDNode *SelectStore(SDNode *N);
+  SDNode *SelectSHL(SDNode *N);
+  SDNode *SelectSelect(SDNode *N);
+  SDNode *SelectTruncate(SDNode *N);
+  SDNode *SelectMul(SDNode *N);
+  SDNode *SelectZeroExtend(SDNode *N);
+  SDNode *SelectIntrinsicWOChain(SDNode *N);
+  SDNode *SelectConstant(SDNode *N);
+  SDNode *SelectAdd(SDNode *N);
+
+  // Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
+  return new HexagonDAGToDAGISel(TM);
+}
+
+static bool IsS11_0_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+  switch (ID) {
+    default:
+      return 0;
+    case Intrinsic::hexagon_C2_cmpeq:
+    case Intrinsic::hexagon_C2_cmpgt:
+    case Intrinsic::hexagon_C2_cmpgtu:
+    case Intrinsic::hexagon_C2_cmpgtup:
+    case Intrinsic::hexagon_C2_cmpgtp:
+    case Intrinsic::hexagon_C2_cmpeqp:
+    case Intrinsic::hexagon_C2_bitsset:
+    case Intrinsic::hexagon_C2_bitsclr:
+    case Intrinsic::hexagon_C2_cmpeqi:
+    case Intrinsic::hexagon_C2_cmpgti:
+    case Intrinsic::hexagon_C2_cmpgtui:
+    case Intrinsic::hexagon_C2_cmpgei:
+    case Intrinsic::hexagon_C2_cmpgeui:
+    case Intrinsic::hexagon_C2_cmplt:
+    case Intrinsic::hexagon_C2_cmpltu:
+    case Intrinsic::hexagon_C2_bitsclri:
+    case Intrinsic::hexagon_C2_and:
+    case Intrinsic::hexagon_C2_or:
+    case Intrinsic::hexagon_C2_xor:
+    case Intrinsic::hexagon_C2_andn:
+    case Intrinsic::hexagon_C2_not:
+    case Intrinsic::hexagon_C2_orn:
+    case Intrinsic::hexagon_C2_pxfer_map:
+    case Intrinsic::hexagon_C2_any8:
+    case Intrinsic::hexagon_C2_all8:
+    case Intrinsic::hexagon_A2_vcmpbeq:
+    case Intrinsic::hexagon_A2_vcmpbgtu:
+    case Intrinsic::hexagon_A2_vcmpheq:
+    case Intrinsic::hexagon_A2_vcmphgt:
+    case Intrinsic::hexagon_A2_vcmphgtu:
+    case Intrinsic::hexagon_A2_vcmpweq:
+    case Intrinsic::hexagon_A2_vcmpwgt:
+    case Intrinsic::hexagon_A2_vcmpwgtu:
+    case Intrinsic::hexagon_C2_tfrrp:
+    case Intrinsic::hexagon_S2_tstbit_i:
+    case Intrinsic::hexagon_S2_tstbit_r:
+      return 1;
+  }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+  switch (ID) {
+    default:
+      return 0;
+    case Intrinsic::hexagon_C2_tfrpr:
+      return Hexagon::TFR_RsPd;
+    case Intrinsic::hexagon_C2_and:
+      return Hexagon::AND_pp;
+    case Intrinsic::hexagon_C2_xor:
+      return Hexagon::XOR_pp;
+    case Intrinsic::hexagon_C2_or:
+      return Hexagon::OR_pp;
+    case Intrinsic::hexagon_C2_not:
+      return Hexagon::NOT_pp;
+    case Intrinsic::hexagon_C2_any8:
+      return Hexagon::ANY_pp;
+    case Intrinsic::hexagon_C2_all8:
+      return Hexagon::ALL_pp;
+    case Intrinsic::hexagon_C2_vitpack:
+      return Hexagon::VITPACK_pp;
+    case Intrinsic::hexagon_C2_mask:
+      return Hexagon::MASK_p;
+    case Intrinsic::hexagon_C2_mux:
+      return Hexagon::MUX_rr;
+
+      // Mapping hexagon_C2_muxir to MUX_pri.  This is pretty weird - but
+      // that's how it's mapped in q6protos.h.
+    case Intrinsic::hexagon_C2_muxir:
+      return Hexagon::MUX_ri;
+
+      // Mapping hexagon_C2_muxri to MUX_pir.  This is pretty weird - but
+      // that's how it's mapped in q6protos.h.
+    case Intrinsic::hexagon_C2_muxri:
+      return Hexagon::MUX_ir;
+
+    case Intrinsic::hexagon_C2_muxii:
+      return Hexagon::MUX_ii;
+    case Intrinsic::hexagon_C2_vmux:
+      return Hexagon::VMUX_prr64;
+    case Intrinsic::hexagon_S2_valignrb:
+      return Hexagon::VALIGN_rrp;
+    case Intrinsic::hexagon_S2_vsplicerb:
+      return Hexagon::VSPLICE_rrp;
+  }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+  if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i8 && isInt<11>(Offset)) {
+    return true;
+  }
+  return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads.  Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Chain = LD->getChain();
+  SDNode* Const32 = LD->getBasePtr().getNode();
+  unsigned Opcode = 0;
+
+  if (Const32->getOpcode() == HexagonISD::CONST32 &&
+      ISD::isNormalLoad(LD)) {
+    SDValue Base = Const32->getOperand(0);
+    EVT LoadedVT = LD->getMemoryVT();
+    int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+    if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+      MVT PointerTy = TLI.getPointerTy();
+      const GlobalValue* GV =
+        cast<GlobalAddressSDNode>(Base)->getGlobal();
+      SDValue TargAddr =
+        CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+      SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+                                               dl, PointerTy,
+                                               TargAddr);
+      // Figure out base + offset opcode
+      if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+      else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+      else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+      else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+      else assert (0 && "unknown memory type");
+
+      // Build indexed load.
+      SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+      SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                              LD->getValueType(0),
+                                              MVT::Other,
+                                              SDValue(NewBase,0),
+                                              TargetConstOff,
+                                              Chain);
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+      ReplaceUses(LD, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           DebugLoc dl)
+{
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  SDValue N1 = LD->getOperand(1);
+  SDValue CPTmpN1_0;
+  SDValue CPTmpN1_1;
+  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+      N1.getNode()->getValueType(0) == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+      SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+                                                MVT::Other, Base, TargetConst,
+                                                Chain);
+      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+                                                SDValue(Result_1, 0));
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result_2, 0),
+                                SDValue(Result_1, 1),
+                                SDValue(Result_1, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+      return Result_2;
+    } 
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                              MVT::Other, Base, TargetConst0,
+                                              Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+                                                MVT::i64, SDValue(Result_1, 0));
+    SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+                                              MVT::i32, Base, TargetConstVal,
+                                                SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_2, 0),
+                              SDValue(Result_3, 0),
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_2;
+  }
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           DebugLoc dl)
+{
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  SDValue N1 = LD->getOperand(1);
+  SDValue CPTmpN1_0;
+  SDValue CPTmpN1_1;
+  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+      N1.getNode()->getValueType(0) == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+      SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                                MVT::i32, MVT::Other, Base,
+                                                TargetConstVal, Chain);
+      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                                                TargetConst0);
+      SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                                MVT::i64, MVT::Other,
+                                                SDValue(Result_2,0),
+                                                SDValue(Result_1,0));
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result_3, 0),
+                                SDValue(Result_1, 1),
+                                SDValue(Result_1, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+      return Result_3;
+    }
+
+    // Generate an indirect load.
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                              MVT::Other,
+                                              Base, TargetConst0, Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                                              TargetConst0);
+    SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                              MVT::i64, MVT::Other,
+                                              SDValue(Result_2,0),
+                                              SDValue(Result_1,0));
+    // Add offset to base.
+    SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                              Base, TargetConstVal,
+                                              SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_3, 0), // Load value.
+                              SDValue(Result_4, 0), // New address.
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_3;
+  }
+
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+  SDValue Chain = LD->getChain();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT LoadedVT = LD->getMemoryVT();
+  unsigned Opcode = 0;
+
+  // Check for zero ext loads.
+  bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+  // Figure out the opcode.
+  if (LoadedVT == MVT::i64) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::POST_LDrid;
+    else
+      Opcode = Hexagon::LDrid;
+  } else if (LoadedVT == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::POST_LDriw;
+    else
+      Opcode = Hexagon::LDriw;
+  } else if (LoadedVT == MVT::i16) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+    else
+      Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+  } else if (LoadedVT == MVT::i8) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+    else
+      Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+  } else
+    assert (0 && "unknown memory type");
+
+  // For zero ext i64 loads, we need to add combine instructions.
+  if (LD->getValueType(0) == MVT::i64 &&
+      LD->getExtensionType() == ISD::ZEXTLOAD) {
+    return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+  }
+  if (LD->getValueType(0) == MVT::i64 &&
+             LD->getExtensionType() == ISD::SEXTLOAD) {
+    // Handle sign ext i64 loads.
+    return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+  }
+  if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                            LD->getValueType(0),
+                                            MVT::i32, MVT::Other, Base,
+                                            TargetConstVal, Chain);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Result, 1),
+                              SDValue(Result, 2)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result;
+  } else {
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+                                              LD->getValueType(0),
+                                              MVT::Other, Base, TargetConst0,
+                                              Chain);
+    SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                              Base, TargetConstVal,
+                                              SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_1, 0),
+                              SDValue(Result_2, 0),
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_1;
+  }
+
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+  SDNode *result;
+  DebugLoc dl = N->getDebugLoc();
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+  // Handle indexed loads.
+  if (AM != ISD::UNINDEXED) {
+    result = SelectIndexedLoad(LD, dl);
+  } else {
+    result = SelectBaseOffsetLoad(LD, dl);
+  }
+
+  return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+  SDValue Chain = ST->getChain();
+  SDValue Base = ST->getBasePtr();
+  SDValue Offset = ST->getOffset();
+  SDValue Value = ST->getValue();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT StoredVT = ST->getMemoryVT();
+
+  // Offset value must be within representable range
+  // and must have correct alignment properties.
+  if (TII->isValidAutoIncImm(StoredVT, Val)) {
+    SDValue Ops[] = { Value, Base,
+                      CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+    unsigned Opcode = 0;
+
+    // Figure out the post inc version of opcode.
+    if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+    else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+    else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+    else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+    else assert (0 && "unknown memory type");
+
+    // Build post increment store.
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                            MVT::Other, Ops, 4);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = ST->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+    ReplaceUses(ST, Result);
+    ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+    return Result;
+  }
+
+  // Note: Order of operands matches the def of instruction:
+  // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+  // and it differs for POST_ST* for instance.
+  SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+                    Chain};
+  unsigned Opcode = 0;
+
+  // Figure out the opcode.
+  if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+  else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+  else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+  else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+  else assert (0 && "unknown memory type");
+
+  // Build regular store.
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+                                            4);
+  // Build splitted incriment instruction.
+  SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                            Base,
+                                            TargetConstVal,
+                                            SDValue(Result_1, 0));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = ST->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+  ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+  ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+  return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+                                                   DebugLoc dl) {
+  SDValue Chain = ST->getChain();
+  SDNode* Const32 = ST->getBasePtr().getNode();
+  SDValue Value = ST->getValue();
+  unsigned Opcode = 0;
+
+  // Try to lower stores of GlobalAdresses into indexed stores.  Custom
+  // lowering for GlobalAddress nodes has already turned it into a
+  // CONST32.  Avoid truncating stores for the moment.  Post-inc stores
+  // do the same.  Don't think there's a reason for it, so will file a
+  // bug to fix.
+  if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+      !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+    SDValue Base = Const32->getOperand(0);
+    if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+      EVT StoredVT = ST->getMemoryVT();
+      int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+      if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+        MVT PointerTy = TLI.getPointerTy();
+        const GlobalValue* GV =
+          cast<GlobalAddressSDNode>(Base)->getGlobal();
+        SDValue TargAddr =
+          CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+        SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+                                                 dl, PointerTy,
+                                                 TargAddr);
+
+        // Figure out base + offset opcode
+        if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+        else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+        else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+        else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+        else assert (0 && "unknown memory type");
+
+        SDValue Ops[] = {SDValue(NewBase,0),
+                         CurDAG->getTargetConstant(Offset,PointerTy),
+                         Value, Chain};
+        // build indexed store
+        SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                                MVT::Other, Ops, 4);
+        MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+        MemOp[0] = ST->getMemOperand();
+        cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+        ReplaceUses(ST, Result);
+        return Result;
+      }
+    }
+  }
+
+  return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+  // Handle indexed stores.
+  if (AM != ISD::UNINDEXED) {
+    return SelectIndexedStore(ST, dl);
+  }
+   
+  return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+
+  //
+  // %conv.i = sext i32 %tmp1 to i64
+  // %conv2.i = sext i32 %add to i64
+  // %mul.i = mul nsw i64 %conv2.i, %conv.i
+  //
+  //   --- match with the following ---
+  //
+  // %mul.i = mpy (%tmp1, %add)
+  //
+
+  if (N->getValueType(0) == MVT::i64) {
+    // Shifting a i64 signed multiply.
+    SDValue MulOp0 = N->getOperand(0);
+    SDValue MulOp1 = N->getOperand(1);
+
+    SDValue OP0;
+    SDValue OP1;
+
+    // Handle sign_extend and sextload.
+    if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext0 = MulOp0.getOperand(0);
+      if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+        SelectCode(N);
+      }
+
+      OP0 = Sext0;
+    } else if (MulOp0.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        SelectCode(N);
+      }
+
+      SDValue Base = LD->getBasePtr();
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Same goes for the second operand.
+    if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext1 = MulOp1.getOperand(0);
+      if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+        return SelectCode(N);
+      }
+
+      OP1 = Sext1;
+    } else if (MulOp1.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        return SelectCode(N);
+      }
+
+      SDValue Base = LD->getBasePtr();
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Generate a mpy instruction.
+    SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+                                            OP0, OP1);
+    ReplaceUses(N, Result);
+    return Result;
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+      SDValue N000 = N00.getOperand(0);
+      SDValue N001 = N00.getOperand(1);
+      if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+        SDValue N01 = N0.getOperand(1);
+        SDValue N02 = N0.getOperand(2);
+
+        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+        // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+        // IntRegs:i32:$src2)
+        // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+        // Pattern complexity = 9  cost = 1  size = 0.
+        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+          SDValue N1 = N->getOperand(1);
+          if (N01 == N1) {
+            SDValue N2 = N->getOperand(2);
+            if (N000 == N2 &&
+                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+                                                        MVT::i32, N000);
+              SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+                                                      MVT::i32,
+                                                      SDValue(SextNode, 0),
+                                                      N1);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+          }
+        }
+
+        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+        // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+        // IntRegs:i32:$src2)
+        // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+        // Pattern complexity = 9  cost = 1  size = 0.
+        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+          SDValue N1 = N->getOperand(1);
+          if (N01 == N1) {
+            SDValue N2 = N->getOperand(2);
+            if (N000 == N2 &&
+                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+                                                        MVT::i32, N000);
+              SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+                                                      MVT::i32,
+                                                      SDValue(SextNode, 0),
+                                                      N1);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Shift = N->getOperand(0);
+
+  //
+  // %conv.i = sext i32 %tmp1 to i64
+  // %conv2.i = sext i32 %add to i64
+  // %mul.i = mul nsw i64 %conv2.i, %conv.i
+  // %shr5.i = lshr i64 %mul.i, 32
+  // %conv3.i = trunc i64 %shr5.i to i32
+  //
+  //   --- match with the following ---
+  //
+  // %conv3.i = mpy (%tmp1, %add)
+  //
+  // Trunc to i32.
+  if (N->getValueType(0) == MVT::i32) {
+    // Trunc from i64.
+    if (Shift.getNode()->getValueType(0) == MVT::i64) {
+      // Trunc child is logical shift right.
+      if (Shift.getOpcode() != ISD::SRL) {
+        return SelectCode(N);
+      }
+
+      SDValue ShiftOp0 = Shift.getOperand(0);
+      SDValue ShiftOp1 = Shift.getOperand(1);
+
+      // Shift by const 32
+      if (ShiftOp1.getOpcode() != ISD::Constant) {
+        return SelectCode(N);
+      }
+
+      int32_t ShiftConst =
+        cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+      if (ShiftConst != 32) {
+        return SelectCode(N);
+      }
+
+      // Shifting a i64 signed multiply
+      SDValue Mul = ShiftOp0;
+      if (Mul.getOpcode() != ISD::MUL) {
+        return SelectCode(N);
+      }
+
+      SDValue MulOp0 = Mul.getOperand(0);
+      SDValue MulOp1 = Mul.getOperand(1);
+
+      SDValue OP0;
+      SDValue OP1;
+
+      // Handle sign_extend and sextload
+      if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+        SDValue Sext0 = MulOp0.getOperand(0);
+        if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+          return SelectCode(N);
+        }
+
+        OP0 = Sext0;
+      } else if (MulOp0.getOpcode() == ISD::LOAD) {
+        LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+        if (LD->getMemoryVT() != MVT::i32 ||
+            LD->getExtensionType() != ISD::SEXTLOAD ||
+            LD->getAddressingMode() != ISD::UNINDEXED) {
+          return SelectCode(N);
+        }
+
+        SDValue Base = LD->getBasePtr();
+        SDValue Chain = LD->getChain();
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                              MVT::Other,
+                                              LD->getBasePtr(),
+                                              TargetConst0, Chain), 0);
+      } else {
+        return SelectCode(N);
+      }
+
+      // Same goes for the second operand.
+      if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+        SDValue Sext1 = MulOp1.getOperand(0);
+        if (Sext1.getNode()->getValueType(0) != MVT::i32)
+          return SelectCode(N);
+
+        OP1 = Sext1;
+      } else if (MulOp1.getOpcode() == ISD::LOAD) {
+        LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+        if (LD->getMemoryVT() != MVT::i32 ||
+            LD->getExtensionType() != ISD::SEXTLOAD ||
+            LD->getAddressingMode() != ISD::UNINDEXED) {
+          return SelectCode(N);
+        }
+
+        SDValue Base = LD->getBasePtr();
+        SDValue Chain = LD->getChain();
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                              MVT::Other,
+                                              LD->getBasePtr(),
+                                              TargetConst0, Chain), 0);
+      } else {
+        return SelectCode(N);
+      }
+
+      // Generate a mpy instruction.
+      SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+                                              OP0, OP1);
+      ReplaceUses(N, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) == MVT::i32) {
+    SDValue Shl_0 = N->getOperand(0);
+    SDValue Shl_1 = N->getOperand(1);
+    // RHS is const.
+    if (Shl_1.getOpcode() == ISD::Constant) {
+      if (Shl_0.getOpcode() == ISD::MUL) {
+        SDValue Mul_0 = Shl_0.getOperand(0); // Val
+        SDValue Mul_1 = Shl_0.getOperand(1); // Const
+        // RHS of mul is const.
+        if (Mul_1.getOpcode() == ISD::Constant) {
+          int32_t ShlConst =
+            cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+          int32_t MulConst =
+            cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+          int32_t ValConst = MulConst << ShlConst;
+          SDValue Val = CurDAG->getTargetConstant(ValConst,
+                                                  MVT::i32);
+          if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+            if (isInt<9>(CN->getSExtValue())) {
+              SDNode* Result =
+                CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+                                       MVT::i32, Mul_0, Val);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+
+        }
+      } else if (Shl_0.getOpcode() == ISD::SUB) {
+        SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+        SDValue Sub_1 = Shl_0.getOperand(1); // Val
+        if (Sub_0.getOpcode() == ISD::Constant) {
+          int32_t SubConst =
+            cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+          if (SubConst == 0) {
+            if (Sub_1.getOpcode() == ISD::SHL) {
+              SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+              SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+              if (Shl2_1.getOpcode() == ISD::Constant) {
+                int32_t ShlConst =
+                  cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+                int32_t Shl2Const =
+                  cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+                int32_t ValConst = 1 << (ShlConst+Shl2Const);
+                SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+                if (ConstantSDNode *CN =
+                    dyn_cast<ConstantSDNode>(Val.getNode()))
+                  if (isInt<9>(CN->getSExtValue())) {
+                    SDNode* Result =
+                      CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+                                             Shl2_0, Val);
+                    ReplaceUses(N, Result);
+                    return Result;
+                  }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDNode *IsIntrinsic = N->getOperand(0).getNode();
+  if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+    unsigned ID =
+      cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+    if (doesIntrinsicReturnPredicate(ID)) {
+      // Now we need to differentiate target data types.
+      if (N->getValueType(0) == MVT::i64) {
+        // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+                                                  MVT::i32,
+                                                  SDValue(IsIntrinsic, 0));
+        SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+                                                  MVT::i32,
+                                                  TargetConst0);
+        SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                                  MVT::i64, MVT::Other,
+                                                  SDValue(Result_2, 0),
+                                                  SDValue(Result_1, 0));
+        ReplaceUses(N, Result_3);
+        return Result_3;
+      }
+      if (N->getValueType(0) == MVT::i32) {
+        // Convert the zero_extend to Rs = Pd
+        SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+                                              MVT::i32,
+                                              SDValue(IsIntrinsic, 0));
+        ReplaceUses(N, RsPd);
+        return RsPd;
+      }
+      assert(0 && "Unexpected value type");
+    }
+  }
+  return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+  // We are concerned with only those intrinsics that have predicate registers
+  // as at least one of the operands.
+  if (IntrinsicWithPred) {
+    SmallVector<SDValue, 8> Ops;
+    const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+    const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+    // Iterate over all the operands of the intrinsics.
+    // For PredRegs, do the transfer.
+    // For Double/Int Regs, just preserve the value
+    // For immediates, lower it.
+    for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+      SDNode *Arg = N->getOperand(i).getNode();
+      const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+
+      if (RC == Hexagon::IntRegsRegisterClass ||
+          RC == Hexagon::DoubleRegsRegisterClass) {
+        Ops.push_back(SDValue(Arg, 0));
+      } else if (RC == Hexagon::PredRegsRegisterClass) {
+        // Do the transfer.
+        SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+                                              SDValue(Arg, 0));
+        Ops.push_back(SDValue(PdRs,0));
+      } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+        // This is immediate operand. Lower it here making sure that we DO have
+        // const SDNode for immediate value.
+        int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+        SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+        Ops.push_back(SDVal);
+      } else {
+        assert(0 && "Unimplemented");
+      }
+    }
+    EVT ReturnValueVT = N->getValueType(0);
+    SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+                                            ReturnValueVT,
+                                            Ops.data(), Ops.size());
+    ReplaceUses(N, Result);
+    return Result;
+  }
+  return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) == MVT::i1) {
+    SDNode* Result;
+    int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    if (Val == -1) {
+      unsigned NewIntReg = TM.getInstrInfo()->createVR(MF, MVT(MVT::i32));
+      SDValue Reg = CurDAG->getRegister(NewIntReg, MVT::i32);
+
+      // Create the IntReg = 1 node.
+      SDNode* IntRegTFR =
+        CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                               CurDAG->getTargetConstant(0, MVT::i32));
+
+      // Pd = IntReg
+      SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+                                          SDValue(IntRegTFR, 0));
+
+      // not(Pd)
+      SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_pp, dl, MVT::i1,
+                                             SDValue(Pd, 0));
+
+      // xor(not(Pd))
+      Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+                                      SDValue(Pd, 0), SDValue(NotPd, 0));
+
+      // We have just built:
+      // Rs = Pd
+      // Pd = xor(not(Pd), Pd)
+
+      ReplaceUses(N, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+  // Identify nodes of the form: add(asr(...)).
+  SDNode* Src1 = N->getOperand(0).getNode();
+  if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+      || Src1->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+
+  // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+  // Rd and Rd' are assigned to the same register
+  SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+                                          N->getOperand(1),
+                                          Src1->getOperand(0),
+                                          Src1->getOperand(1));
+  ReplaceUses(N, Result);
+
+  return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+
+  switch (N->getOpcode()) {
+  case ISD::Constant:
+    return SelectConstant(N);
+
+  case ISD::ADD:
+    return SelectAdd(N);
+
+  case ISD::SHL:
+    return SelectSHL(N);
+
+  case ISD::LOAD:
+    return SelectLoad(N);
+
+  case ISD::STORE:
+    return SelectStore(N);
+
+  case ISD::SELECT:
+    return SelectSelect(N);
+
+  case ISD::TRUNCATE:
+    return SelectTruncate(N);
+
+  case ISD::MUL:
+    return SelectMul(N);
+
+  case ISD::ZERO_EXTEND:
+    return SelectZeroExtend(N);
+
+  case ISD::INTRINSIC_WO_CHAIN:
+    return SelectIntrinsicWOChain(N);
+  }
+
+  return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_0_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_1_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_2_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_0_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_1_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_2_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+                                           SDValue &Offset) {
+
+  if (Addr.getOpcode() != ISD::ADD) {
+    return(SelectADDRriS11_2(Addr, Base, Offset));
+  }
+
+  return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_3_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+                                       SDValue &R2) {
+  if (Addr.getOpcode() == ISD::FrameIndex) return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      if (isInt<13>(CN->getSExtValue()))
+        return false;  // Let the reg+imm pattern catch this!
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+    return true;
+  }
+
+  R1 = Addr;
+
+  return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+                                          SDValue &Base, SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    Base = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+    return true;
+  }
+
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+
+  switch (ConstraintCode) {
+  case 'o':   // Offsetable.
+  case 'v':   // Not offsetable.
+  default: return true;
+  case 'm':   // Memory.
+    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+      return true;
+    break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 0000000..0ac3cf0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1505 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+using namespace llvm;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+               cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+           MVT LocVT, CCValAssign::LocInfo LocInfo,
+           ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+              MVT LocVT, CCValAssign::LocInfo LocInfo,
+              ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+            MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  // NumNamedVarArgParams can not be zero for a VarArg function.
+  assert ( (NumNamedVarArgParams > 0) &&
+           "NumNamedVarArgParams is not bigger than zero.");
+
+  if ( (int)ValNo < NumNamedVarArgParams ) {
+    // Deal with named arguments.
+    return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+  }
+
+  // Deal with un-named arguments.
+  unsigned ofst;
+  if (ArgFlags.isByVal()) {
+    // If pass-by-value, the size allocated on stack is decided
+    // by ArgFlags.getByValSize(), not by the size of LocVT.
+    assert ((ArgFlags.getByValSize() > 8) &&
+            "ByValSize must be bigger than 8 bytes");
+    ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i32) {
+    ofst = State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i64) {
+    ofst = State.AllocateStack(8, 8);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  llvm_unreachable(0);
+
+  return true;
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+            MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (ArgFlags.isByVal()) {
+    // Passed on stack.
+    assert ((ArgFlags.getByValSize() > 8) &&
+            "ByValSize must be bigger than 8 bytes");
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+
+  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (LocVT == MVT::i32) {
+    if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  if (LocVT == MVT::i64) {
+    if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  return true;  // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const unsigned RegList[] = {
+    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+    Hexagon::R5
+  };
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  static const unsigned RegList1[] = {
+    Hexagon::D1, Hexagon::D2
+  };
+  static const unsigned RegList2[] = {
+    Hexagon::R1, Hexagon::R3
+  };
+  if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+  if (LocVT == MVT::i1 ||
+      LocVT == MVT::i8 ||
+      LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (LocVT == MVT::i32) {
+    if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+
+  if (LocVT == MVT::i64) {
+    if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+
+  return true;  // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (LocVT == MVT::i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  if (LocVT == MVT::i64) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+  return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.  Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze return values of ISD::RET
+  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+  SDValue StackPtr = DAG.getRegister(TM.getRegisterInfo()->getStackRegister(),
+                                     MVT::i32);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    SDValue Ret = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers.  This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const
+                                       SmallVectorImpl<ISD::InputArg> &Ins,
+                                       DebugLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals,
+                                       const SmallVectorImpl<SDValue> &OutVals,
+                                       SDValue Callee) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 bool &isTailCall,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+
+  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // Check for varargs.
+  NumNamedVarArgParams = -1;
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+  {
+    const Function* CalleeFn = NULL;
+    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+    if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+    {
+      // If a function has zero args and is a vararg function, that's
+      // disallowed so it must be an undeclared function.  Do not assume
+      // varargs if the callee is undefined.
+      if (CalleeFn->isVarArg() &&
+          CalleeFn->getFunctionType()->getNumParams() != 0) {
+        NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+      }
+    }
+  }
+
+  if (NumNamedVarArgParams > 0)
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+  else
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+  if(isTailCall) {
+    bool StructAttrFlag =
+      DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                                                   isVarArg, IsStructRet,
+                                                   StructAttrFlag,
+                                                   Outs, OutVals, Ins, DAG);
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+      CCValAssign &VA = ArgLocs[i];
+      if (VA.isMemLoc()) {
+        isTailCall = false;
+        break;
+      }
+    }
+    if (isTailCall) {
+      DEBUG(dbgs () << "Eligible for Tail Call\n");
+    } else {
+      DEBUG(dbgs () <<
+            "Argument must be passed on stack. Not eligible for Tail Call\n");
+    }
+  }
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  SDValue StackPtr =
+    DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+                       getPointerTy());
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default:
+        // Loc info must be one of Full, SExt, ZExt, or AExt.
+        assert(0 && "Unknown loc info!");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    if (VA.isMemLoc()) {
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+      if (Flags.isByVal()) {
+        // The argument is a struct passed by value. According to LLVM, "Arg"
+        // is is pointer.
+        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+                                                        Flags, DAG, dl));
+      } else {
+        // The argument is not passed by value. "Arg" is a buildin type. It is
+        // not a pointer.
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),false, false,
+                                           0));
+      }
+      continue;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector.
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty()) {
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+                        MemOpChains.size());
+  }
+
+  if (!isTailCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+                                                        getPointerTy(), true));
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  if (!isTailCall) {
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+  }
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    //
+    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (flag_aligned_memcpy) {
+    const char *MemcpyName =
+      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+    Callee =
+      DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+    flag_aligned_memcpy = false;
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+  } else if (ExternalSymbolSDNode *S =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode()) {
+    Ops.push_back(InFlag);
+  }
+
+  if (isTailCall)
+    return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+  Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+                                   bool isSEXTLoad, SDValue &Base,
+                                   SDValue &Offset, bool &isInc,
+                                   SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD)
+  return false;
+
+  if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    // Ensure that Offset is a constant.
+    return (isa<ConstantSDNode>(Offset));
+  }
+
+  return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonImmediates.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+  ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  int64_t m = 0;
+  if (ShiftAmount > 0) {
+    m = v % ShiftAmount;
+    v = v >> ShiftAmount;
+  }
+  return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                       SDValue &Base,
+                                                       SDValue &Offset,
+                                                       ISD::MemIndexedMode &AM,
+                                                       SelectionDAG &DAG) const
+{
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  bool isInc;
+  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+  int ShiftAmount = VT.getSizeInBits() / 16;
+  if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+    return true;
+  }
+
+  return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  MachineFunction &MF = DAG.getMachineFunction();
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  switch (Node->getOpcode()) {
+    case ISD::INLINEASM: {
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the flag operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        if (FuncInfo->hasClobberLR())
+          break;
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        ++i;  // Skip the ID value.
+
+        switch (InlineAsm::getKind(Flags)) {
+        default: llvm_unreachable("Bad flags!");
+          case InlineAsm::Kind_RegDef:
+          case InlineAsm::Kind_RegUse:
+          case InlineAsm::Kind_Imm:
+          case InlineAsm::Kind_Clobber:
+          case InlineAsm::Kind_Mem: {
+            for (; NumVals; --NumVals, ++i) {}
+            break;
+          }
+          case InlineAsm::Kind_RegDefEarlyClobber: {
+            for (; NumVals; --NumVals, ++i) {
+              unsigned Reg =
+                cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+              // Check it to be lr
+              if (Reg == TM.getRegisterInfo()->getRARegister()) {
+                FuncInfo->setHasClobberLR(true);
+                break;
+              }
+            }
+            break;
+          }
+        }
+      }
+    }
+  } // Node->getOpcode
+  return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  unsigned JTI = JT->getIndex();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+  // Mark all jump table targets as address taken.
+  const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    MBB->setHasAddressTaken();
+    // This line is needed to set the hasAddressTaken flag on the BasicBlock
+    // object.
+    BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+  }
+
+  SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+                                      getPointerTy(), TargetJT);
+  SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+                                   DAG.getConstant(2, MVT::i32));
+  SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+                                  ShiftIndex);
+  SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+                                   MachinePointerInfo(), false, false, false,
+                                   0);
+  return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+  // Get a reference to the stack pointer.
+  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+  // Subtract the dynamic size from the actual stack size to
+  // obtain the new stack size.
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+  //
+  // For Hexagon, the outgoing memory arguments area should be on top of the
+  // alloca area on the stack i.e., the outgoing memory arguments should be
+  // at a lower address than the alloca area. Move the alloca area down the
+  // stack by adding back the space reserved for outgoing arguments to SP
+  // here.
+  //
+  // We do not know what the size of the outgoing args is at this point.
+  // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+  // stack pointer. We patch this instruction with the correct, known
+  // offset in emitPrologue().
+  //
+  // Use a placeholder immediate (zero) for now. This will be patched up
+  // by emitPrologue().
+  SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+                                  MVT::i32,
+                                  Sub,
+                                  DAG.getConstant(0, MVT::i32));
+
+  // The Sub result contains the new stack start address, so it
+  // must be placed in the stack pointer register.
+  SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+                                       TM.getRegisterInfo()->getStackRegister(),
+                                       Sub);
+
+  SDValue Ops[2] = { ArgAdjust, CopyChain };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const
+                                            SmallVectorImpl<ISD::InputArg> &Ins,
+                                            DebugLoc dl, SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals)
+const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+  // For LLVM, in the case when returning a struct by value (>8byte),
+  // the first argument is a pointer that points to the location on caller's
+  // stack where the return value will be stored. For Hexagon, the location on
+  // caller's stack is passed only when the struct size is smaller than (and
+  // equal to) 8 bytes. If not, no address will be passed into callee and
+  // callee return the result direclty through R0/R1.
+
+  SmallVector<SDValue, 4> MemOps;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    unsigned ObjSize;
+    unsigned StackLocation;
+    int FI;
+
+    if (   (VA.isRegLoc() && !Flags.isByVal())
+        || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+      // Arguments passed in registers
+      // 1. int, long long, ptr args that get allocated in register.
+      // 2. Large struct that gets an register to put its address in.
+      EVT RegVT = VA.getLocVT();
+      if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (RegVT == MVT::i64) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else {
+        assert (0);
+      }
+    } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+      assert (0 && "ByValSize must be bigger than 8 bytes");
+    } else {
+      // Sanity check.
+      assert(VA.isMemLoc());
+
+      if (Flags.isByVal()) {
+        // If it's a byval parameter, then we need to compute the
+        // "real" size, not the size of the pointer.
+        ObjSize = Flags.getByValSize();
+      } else {
+        ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+      }
+
+      StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+      // Create the frame index object for this incoming parameter...
+      FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+      // Create the SelectionDAG nodes cordl, responding to a load
+      // from this parameter.
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+      if (Flags.isByVal()) {
+        // If it's a pass-by-value aggregate, then do not dereference the stack
+        // location. Instead, we should generate a reference to the stack
+        // location.
+        InVals.push_back(FIN);
+      } else {
+        InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                     MachinePointerInfo(), false, false,
+                                     false, 0));
+      }
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+                        MemOps.size());
+
+  if (isVarArg) {
+    // This will point to the next argument passed via stack.
+    int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+                                            HEXAGON_LRFP_SIZE +
+                                            CCInfo.getNextStackOffset(),
+                                            true);
+    FuncInfo->setVarArgsFrameIndex(FrameIndex);
+  }
+
+  return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  // VASTART stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  MachineFunction &MF = DAG.getMachineFunction();
+  HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+  SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+                      Op.getOperand(1), MachinePointerInfo(SV), false,
+                      false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDNode* OpNode = Op.getNode();
+
+  SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
+                             Op.getOperand(2), Op.getOperand(3),
+                             Op.getOperand(4));
+  return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
+                     Cond, Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  const HexagonRegisterInfo  *TRI = TM.getRegisterInfo();
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         TRI->getFrameRegister(), VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, false, 0);
+  return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+                                               SelectionDAG& DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other,  Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+                                                 SelectionDAG& DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDValue Result;
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+  DebugLoc dl = Op.getDebugLoc();
+  Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+  HexagonTargetObjectFile &TLOF =
+    (HexagonTargetObjectFile&)getObjFileLowering();
+  if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+    return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+  }
+
+  return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+                                             &targetmachine)
+  : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+    TM(targetmachine) {
+
+    // Set up the register classes.
+    addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
+    addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+
+    addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+
+    computeRegisterProperties();
+
+    // Align loop entry
+    setPrefLoopAlignment(4);
+
+    // Limits for inline expansion of memcpy/memmove
+    maxStoresPerMemcpy = 6;
+    maxStoresPerMemmove = 6;
+
+    //
+    // Library calls for unsupported operations
+    //
+    setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+    setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+    setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+    setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+    setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+    setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+    setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+    setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+    setOperationAction(ISD::SREM,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+    setOperationAction(ISD::SDIV,  MVT::i64, Expand);
+    setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+    setOperationAction(ISD::SREM,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+    setOperationAction(ISD::UDIV,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+    setOperationAction(ISD::UREM,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+    setOperationAction(ISD::UREM,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+    setOperationAction(ISD::FDIV,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+    setOperationAction(ISD::FDIV,  MVT::f64, Expand);
+
+    setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+    setOperationAction(ISD::SINT_TO_FP,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+    setOperationAction(ISD::FADD,  MVT::f64, Expand);
+
+    setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+    setOperationAction(ISD::FADD,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+    setOperationAction(ISD::FADD,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+    setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+    setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+    setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+    setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+    setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+    setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+    setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+    setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+    setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+    setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+    setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+    setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+    setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+    setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+    setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+    setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+    setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+    // Turn FP extload into load/fextend.
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+    // Hexagon has a i1 sign extending load.
+    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+    // Turn FP truncstore into trunc + store.
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+    // Custom legalize GlobalAddress nodes into CONST32.
+    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+    setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+    // Truncate action?
+    setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+    // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+    // Hexagon has no REM or DIVREM operations.
+    setOperationAction(ISD::UREM, MVT::i32, Expand);
+    setOperationAction(ISD::SREM, MVT::i32, Expand);
+    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+    setOperationAction(ISD::SREM, MVT::i64, Expand);
+    setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+    setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+    // Expand fp<->uint.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+    // Hexagon has no select or setcc: expand to SELECT_CC.
+    setOperationAction(ISD::SELECT, MVT::f32, Expand);
+    setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+    // Lower SELECT_CC to SETCC and SELECT.
+    setOperationAction(ISD::SELECT_CC, MVT::i32,   Custom);
+    setOperationAction(ISD::SELECT_CC, MVT::i64,   Custom);
+    // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+    // support SELECT_CC on every type.
+    setOperationAction(ISD::SELECT_CC, MVT::Other,   Expand);
+
+    setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+    setOperationAction(ISD::BRIND, MVT::Other, Expand);
+    if (EmitJumpTables) {
+      setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+    } else {
+      setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+    }
+
+    setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+    setOperationAction(ISD::FSIN , MVT::f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::f64, Expand);
+    setOperationAction(ISD::FREM , MVT::f64, Expand);
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+    setOperationAction(ISD::FREM , MVT::f32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::ROTL , MVT::i32, Expand);
+    setOperationAction(ISD::ROTR , MVT::i32, Expand);
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FPOW , MVT::f64, Expand);
+    setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+    setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+    setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+    setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+    setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+    setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+    setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+    setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+    setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+
+    setOperationAction(ISD::EH_RETURN,     MVT::Other, Expand);
+
+    if (TM.getSubtargetImpl()->isSubtargetV2()) {
+      setExceptionPointerRegister(Hexagon::R20);
+      setExceptionSelectorRegister(Hexagon::R21);
+    } else {
+      setExceptionPointerRegister(Hexagon::R0);
+      setExceptionSelectorRegister(Hexagon::R1);
+    }
+
+    // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+    setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+    // Use the default implementation.
+    setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+    setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+    setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+    setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+
+
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+    setOperationAction(ISD::INLINEASM         , MVT::Other, Custom);
+
+    setMinFunctionAlignment(2);
+
+    // Needed for DYNAMIC_STACKALLOC expansion.
+    unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+    setStackPointerRegisterToSaveRestore(StackRegister);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    default: return 0;
+    case HexagonISD::CONST32:    return "HexagonISD::CONST32";
+    case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+    case HexagonISD::CMPICC:     return "HexagonISD::CMPICC";
+    case HexagonISD::CMPFCC:     return "HexagonISD::CMPFCC";
+    case HexagonISD::BRICC:      return "HexagonISD::BRICC";
+    case HexagonISD::BRFCC:      return "HexagonISD::BRFCC";
+    case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+    case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+    case HexagonISD::Hi:         return "HexagonISD::Hi";
+    case HexagonISD::Lo:         return "HexagonISD::Lo";
+    case HexagonISD::FTOI:       return "HexagonISD::FTOI";
+    case HexagonISD::ITOF:       return "HexagonISD::ITOF";
+    case HexagonISD::CALL:       return "HexagonISD::CALL";
+    case HexagonISD::RET_FLAG:   return "HexagonISD::RET_FLAG";
+    case HexagonISD::BR_JT:      return "HexagonISD::BR_JT";
+    case HexagonISD::TC_RETURN:  return "HexagonISD::TC_RETURN";
+  }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  EVT MTy1 = EVT::getEVT(Ty1);
+  EVT MTy2 = EVT::getEVT(Ty2);
+  if (!MTy1.isSimple() || !MTy2.isSimple()) {
+    return false;
+  }
+  return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isSimple() || !VT2.isSimple()) {
+    return false;
+  }
+  return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+    default: assert(0 && "Should not custom lower this!");
+      // Frame & Return address.  Currently unimplemented.
+    case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+    case ISD::FRAMEADDR:  return LowerFRAMEADDR(Op, DAG);
+    case ISD::GlobalTLSAddress:
+                          assert(0 && "TLS not implemented for Hexagon.");
+    case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
+    case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+    case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
+
+    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INLINEASM:          return LowerINLINEASM(Op, DAG);
+
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                           Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB)
+const {
+  switch (MI->getOpcode()) {
+    case Hexagon::ADJDYNALLOC: {
+      MachineFunction *MF = BB->getParent();
+      HexagonMachineFunctionInfo *FuncInfo =
+        MF->getInfo<HexagonMachineFunctionInfo>();
+      FuncInfo->addAllocaAdjustInst(MI);
+      return BB;
+    }
+    default:
+      assert(false && "Unexpected instr type to insert");
+  } // switch
+  return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+                                                    std::string &Constraint,
+                                                    EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':   // R0-R31
+       switch (VT.getSimpleVT().SimpleTy) {
+       default:
+         assert(0 && "getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::i32:
+       case MVT::i16:
+       case MVT::i8:
+         return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+       case MVT::i64:
+         return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+      }
+    default:
+      assert(0 && "Unknown asm register class");
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                  Type *Ty) const {
+  // Allows a signed-extended 11-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+    return false;
+  }
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV) {
+    return false;
+  }
+
+  int Scale = AM.Scale;
+  if (Scale < 0) Scale = -Scale;
+  switch (Scale) {
+  case 0:  // No scale reg, "r+i", "r", or just "i".
+    break;
+  default: // No scaled addressing mode.
+    return false;
+  }
+  return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+                                 SDValue Callee,
+                                 CallingConv::ID CalleeCC,
+                                 bool isVarArg,
+                                 bool isCalleeStructRet,
+                                 bool isCallerStructRet,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 SelectionDAG& DAG) const {
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // ***************************************************************************
+  //  Look for obvious safe cases to perform tail call optimization that do not
+  //  require ABI changes.
+  // ***************************************************************************
+
+  // If this is a tail call via a function pointer, then don't do it!
+  if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+      && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+    return false;
+  }
+
+  // Do not optimize if the calling conventions do not match.
+  if (!CCMatch)
+    return false;
+
+  // Do not tail call optimize vararg calls.
+  if (isVarArg)
+    return false;
+
+  // Also avoid tail call optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // In addition to the cases above, we also disable Tail Call Optimization if
+  // the calling convention code that at least one outgoing argument needs to
+  // go on the stack. We cannot check that here because at this point that
+  // information is not available.
+  return true;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 0000000..b327615
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,162 @@
+//==-- HexagonISelLowering.h - Hexagon DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "Hexagon.h"
+
+namespace llvm {
+  namespace HexagonISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      CONST32,
+      CONST32_GP,  // For marking data present in GP.
+      SETCC,
+      ADJDYNALLOC,
+      ARGEXTEND,
+
+      CMPICC,      // Compare two GPR operands, set icc.
+      CMPFCC,      // Compare two FP operands, set fcc.
+      BRICC,       // Branch to dest on icc condition
+      BRFCC,       // Branch to dest on fcc condition
+      SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_FCC,  // Select between two values using the current FCC flags.
+
+      Hi, Lo,      // Hi/Lo operations, typically on a global address.
+
+      FTOI,        // FP to Int within a FP register.
+      ITOF,        // Int to FP within a FP register.
+
+      CALL,        // A call instruction.
+      RET_FLAG,    // Return with a flag operand.
+      BR_JT,       // Jump table.
+      BARRIER,     // Memory barrier.
+      WrapperJT,
+      TC_RETURN
+    };
+  }
+
+  class HexagonTargetLowering : public TargetLowering {
+    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+
+    bool CanReturnSmallStruct(const Function* CalleeFn,
+                              unsigned& RetSize) const;
+
+  public:
+    HexagonTargetMachine &TM;
+    explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      bool isCalleeStructRet,
+                                      bool isCallerStructRet,
+                                      const
+                                      SmallVectorImpl<ISD::OutputArg> &Outs,
+                                      const SmallVectorImpl<SDValue> &OutVals,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
+
+    virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+    SDValue  LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFormalArguments(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCall(SDValue Chain, SDValue Callee,
+                      CallingConv::ID CallConv, bool isVarArg,
+                      bool &isTailCall,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                      DebugLoc dl, SelectionDAG &DAG,
+                      SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals,
+                            const SmallVectorImpl<SDValue> &OutVals,
+                            SDValue Callee) const;
+
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerReturn(SDValue Chain,
+                        CallingConv::ID CallConv, bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        const SmallVectorImpl<SDValue> &OutVals,
+                        DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock
+    *EmitInstrWithCustomInserter(MachineInstr *MI,
+                                 MachineBasicBlock *BB) const;
+
+    SDValue  LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    virtual EVT getSetCCResultType(EVT VT) const {
+      return MVT::i1;
+    }
+
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base, SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const;
+
+    // Intrinsics
+    virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                            SelectionDAG &DAG) const;
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    /// The type may be VoidTy, in which case only return true if the addressing
+    /// mode is legal for a load/store of any legal type.
+    /// TODO: Handle pre/postinc as well.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+  };
+} // end namespace llvm
+
+#endif    // Hexagon_ISELLOWERING_H
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
new file mode 100644
index 0000000..1e3fcb8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -0,0 +1,491 @@
+//=- HexagonImmediates.td - Hexagon immediate processing --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// From IA64's InstrInfo file
+def s32Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s16Imm : Operand<i32> {
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s12Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s11_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s10Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s9Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s8Imm64 : Operand<i64> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def s4_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u64Imm : Operand<i64> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u32Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u16_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u11_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u10Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u9Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u7Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u6_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u5Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u4Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def u2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def n8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+def m6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printHexagonImmOperand";
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x1000000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x10000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<24,16>(v);
+}]>;
+
+def s16ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred  : PatLeaf<(i32 imm), [{
+  // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred  : PatLeaf<(i32 imm), [{
+  // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred  : PatLeaf<(i32 imm), [{
+  // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<9>(v);
+}]>;
+
+
+def s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred  : PatLeaf<(i64 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred  : PatLeaf<(i32 imm), [{
+  // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred  : PatLeaf<(i64 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  // Adding "N ||" to supress gcc unused warning.
+  return (N || true);
+}]>;
+
+def u32ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u16ImmPred  : PatLeaf<(i32 imm), [{
+  // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
+  // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+  // extended s8 field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred  : PatLeaf<(i32 imm), [{
+  // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred  : PatLeaf<(i32 imm), [{
+  // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<8>(v);
+}]>;
+
+def u7ImmPred  : PatLeaf<(i32 imm), [{
+  // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred  : PatLeaf<(i32 imm), [{
+  // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field. Same as u6ImmPred.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 1 bit alinged - multiple of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 2 bits alinged - multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 3 bits alinged - multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred  : PatLeaf<(i32 imm), [{
+  // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred  : PatLeaf<(i32 imm), [{
+  // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred  : PatLeaf<(i32 imm), [{
+  // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred  : PatLeaf<(i1 imm), [{
+  // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
+def m6ImmPred  : PatLeaf<(i32 imm), [{
+  // m6ImmPred predicate - True if the immediate is negative and fits in
+  // a 6-bit negative number.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred  : PatLeaf<(i32 imm), [{
+  // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-255 <= v && v <= 0);
+}]>;
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 0000000..7e92776
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,242 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr,
+                InstrItinClass itin> : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "Hexagon";
+
+/* Commented out for Hexagon
+  bits<2> op;
+  let Inst{31-30} = op; */              // Top two bits are the 'op' field
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+  let Constraints = cstr;
+  let Itinerary   = itin;
+}
+
+//----------------------------------------------------------------------------//
+//                         Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", LD> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+}
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+                 string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr,  LD> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "",  ST> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+                 string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr,  ST> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+}
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstHexagon<outs, ins, asmstr, pattern, "",  ALU32> {
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+  bits<16> imm16_2;
+}
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstHexagon<outs, ins, asmstr, pattern, "",  ALU64> {
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+  bits<16> imm16_2;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "",  M> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+                string cstr>
+    : InstHexagon<outs, ins, asmstr, pattern, cstr,  M> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+//: InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, M)> {
+  : InstHexagon<outs, ins, asmstr, pattern, "",  S> {
+//  : InstHexagon<outs, ins, asmstr, pattern, "", S> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+                string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr,  S> {
+//  : InstHexagon<outs, ins, asmstr, pattern, cstr,  S> {
+//  : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "",  J> {
+  bits<16> imm16;
+}
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", JR> {
+  bits<5>  rs;
+  bits<5>  pu; // Predicate register
+}
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", CR> {
+  bits<5> rs;
+  bits<10> imm10;
+}
+
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO>;
+
+
+//----------------------------------------------------------------------------//
+//                         Intruction Classes Definitions -
+//----------------------------------------------------------------------------//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+   let rt{0-4} = 0;
+}
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+  let rt{0-4} = 0;
+}
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+  let rt{0-4} = 0;
+}
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU64Type<outs, ins, asmstr, pattern> {
+}
+
+// J Type Instructions.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : JType<outs, ins, asmstr, pattern> {
+}
+
+// JR type Instructions.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : JRType<outs, ins, asmstr, pattern> {
+}
+
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+  : STInstPost<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+}
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+  : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 0000000..bd5e449
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,46 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// NV type instructions.
+//
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+}
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+                    string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+}
+
+// Post increment ST Instruction.
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr>
+  : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+}
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "",  MEM_V4> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<6> imm6;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 0000000..69a50d7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,1459 @@
+//=- HexagonInstrInfo.cpp - Hexagon Instruction Information -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#define GET_INSTRINFO_CTOR
+#include "HexagonGenInstrInfo.inc"
+
+#include <iostream>
+
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = 8;
+
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+  : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+    RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+
+
+  switch (MI->getOpcode()) {
+  case Hexagon::LDriw:
+  case Hexagon::LDrid:
+  case Hexagon::LDrih:
+  case Hexagon::LDrib:
+  case Hexagon::LDriub:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::STriw:
+  case Hexagon::STrid:
+  case Hexagon::STrih:
+  case Hexagon::STrib:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL) const{
+
+    int BOpc   = Hexagon::JMP;
+    int BccOpc = Hexagon::JMP_Pred;
+
+    assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+    int regPos = 0;
+    // Check if ReverseBranchCondition has asked to reverse this branch
+    // If we want to reverse the branch an odd number of times, we want
+    // JMP_PredNot.
+    if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+      BccOpc = Hexagon::JMP_PredNot;
+      regPos = 1;
+    }
+
+    if (FBB == 0) {
+      if (Cond.empty()) {
+        // Due to a bug in TailMerging/CFG Optimization, we need to add a
+        // special case handling of a predicated jump followed by an
+        // unconditional jump. If not, Tail Merging and CFG Optimization go
+        // into an infinite loop.
+        MachineBasicBlock *NewTBB, *NewFBB;
+        SmallVector<MachineOperand, 4> Cond;
+        MachineInstr *Term = MBB.getFirstTerminator();
+        if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+                                                 false)) {
+          MachineBasicBlock *NextBB =
+            llvm::next(MachineFunction::iterator(&MBB));
+          if (NewTBB == NextBB) {
+            ReverseBranchCondition(Cond);
+            RemoveBranch(MBB);
+            return InsertBranch(MBB, TBB, 0, Cond, DL);
+          }
+        }
+        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+      } else {
+        BuildMI(&MBB, DL,
+                get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+      }
+      return 1;
+    }
+
+    BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+    return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  FBB = NULL;
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+
+  // A basic block may looks like this:
+  //
+  //  [   insn
+  //     EH_LABEL
+  //      insn
+  //      insn
+  //      insn
+  //     EH_LABEL
+  //      insn     ]
+  //
+  // It has two succs but does not have a terminator
+  // Don't know how to handle it.
+  do {
+    --I;
+    if (I->isEHLabel())
+      return true;
+  } while (I != MBB.begin());
+
+  I = MBB.end();
+  --I;
+
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == Hexagon::JMP) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (LastInst->getOpcode() == Hexagon::JMP_Pred) {
+      // Block ends with fall-through true condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    if (LastInst->getOpcode() == Hexagon::JMP_PredNot) {
+      // Block ends with fall-through false condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(0));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+  if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+      (SecondLastInst->getOpcode() == Hexagon::JMP_Pred)) &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with Hexagon::JMP_PredNot and Hexagon:JMP, handle it.
+  if ((SecondLastInst->getOpcode() == Hexagon::JMP_PredNot) &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(0));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two Hexagon:JMPs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  int BOpc   = Hexagon::JMP;
+  int BccOpc = Hexagon::JMP_Pred;
+  int BccOpcNot = Hexagon::JMP_PredNot;
+
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+      I->getOpcode() != BccOpcNot)
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+    // Map Pd = Ps to Pd = or(Ps, Ps).
+    BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+            DestReg).addReg(SrcReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+    // We can have an overlap between single and double reg: r1:0 = r0.
+    if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+        // r1:0 = r0
+        BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    } else {
+        // r1:0 = r1 or no overlap.
+        BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+                Hexagon::subreg_loreg))).addReg(SrcReg);
+        BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    }
+    return;
+  }
+  if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+    return;
+  } 
+  
+  assert (0 && "Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                      MachineMemOperand::MOStore,
+                      MFI.getObjectSize(FI),
+                      Align);
+
+  if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STrid))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else {
+    assert(0 && "Unimplemented");
+  }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+                                 MachineFunction &MF, unsigned SrcReg,
+                                 bool isKill,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+  assert(0 && "Unimplemented");
+  return;
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                      MachineMemOperand::MOLoad,
+                      MFI.getObjectSize(FI),
+                      Align);
+
+  if (RC == Hexagon::IntRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == Hexagon::PredRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else {
+    assert(0 && "Can't store this register to stack slot");
+  }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                        SmallVectorImpl<MachineOperand> &Addr,
+                                        const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  assert(0 && "Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                    MachineInstr* MI,
+                                          const SmallVectorImpl<unsigned> &Ops,
+                                                    int FI) const {
+  // Hexagon_TODO: Implement.
+  return(0);
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *TRC;
+  if (VT == MVT::i1) {
+    TRC =  Hexagon::PredRegsRegisterClass;
+  } else if (VT == MVT::i32) {
+    TRC =  Hexagon::IntRegsRegisterClass;
+  } else if (VT == MVT::i64) {
+    TRC =  Hexagon::DoubleRegsRegisterClass;
+  } else {
+    assert(0 && "Cannot handle this register class");
+  }
+
+  unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+  return NewReg;
+}
+
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+  bool isPred = MI->getDesc().isPredicable();
+
+  if (!isPred)
+    return false;
+
+  const int Opc = MI->getOpcode();
+
+  switch(Opc) {
+  case Hexagon::TFRI:
+    return isInt<12>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrid:
+  case Hexagon::STrid_indexed:
+    return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+  case Hexagon::STriw:
+  case Hexagon::STriw_indexed:
+  case Hexagon::STriw_nv_V4:
+    return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrih:
+  case Hexagon::STrih_indexed:
+  case Hexagon::STrih_nv_V4:
+    return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrib:
+  case Hexagon::STrib_indexed:
+  case Hexagon::STrib_nv_V4:
+    return isUInt<6>(MI->getOperand(1).getImm());
+
+  case Hexagon::LDrid:
+  case Hexagon::LDrid_indexed:
+    return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDriw:
+  case Hexagon::LDriw_indexed:
+    return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDrih:
+  case Hexagon::LDriuh:
+  case Hexagon::LDrih_indexed:
+  case Hexagon::LDriuh_indexed:
+    return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDrib:
+  case Hexagon::LDriub:
+  case Hexagon::LDrib_indexed:
+  case Hexagon::LDriub_indexed:
+    return isUInt<6>(MI->getOperand(2).getImm());
+
+  case Hexagon::POST_LDrid:
+    return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDriw:
+    return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDrih:
+  case Hexagon::POST_LDriuh:
+    return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDrib:
+  case Hexagon::POST_LDriub:
+    return isInt<4>(MI->getOperand(3).getImm());
+
+  case Hexagon::STrib_imm_V4:
+  case Hexagon::STrih_imm_V4:
+  case Hexagon::STriw_imm_V4:
+    return (isUInt<6>(MI->getOperand(1).getImm()) &&
+            isInt<6>(MI->getOperand(2).getImm()));
+
+  case Hexagon::ADD_ri:
+    return isInt<8>(MI->getOperand(2).getImm());
+
+  case Hexagon::ASLH:
+  case Hexagon::ASRH:
+  case Hexagon::SXTB:
+  case Hexagon::SXTH:
+  case Hexagon::ZXTB:
+  case Hexagon::ZXTH:
+    return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+  case Hexagon::JMPR:
+    return false;
+    return true;
+
+  default:
+    return true;
+  }
+
+  return true;
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+  switch(Opc) {
+  case Hexagon::TFR:
+    return !invertPredicate ? Hexagon::TFR_cPt :
+                              Hexagon::TFR_cNotPt;
+  case Hexagon::TFRI:
+    return !invertPredicate ? Hexagon::TFRI_cPt :
+                              Hexagon::TFRI_cNotPt;
+  case Hexagon::JMP:
+    return !invertPredicate ? Hexagon::JMP_Pred :
+                              Hexagon::JMP_PredNot;
+  case Hexagon::ADD_ri:
+    return !invertPredicate ? Hexagon::ADD_ri_cPt :
+                              Hexagon::ADD_ri_cNotPt;
+  case Hexagon::ADD_rr:
+    return !invertPredicate ? Hexagon::ADD_rr_cPt :
+                              Hexagon::ADD_rr_cNotPt;
+  case Hexagon::XOR_rr:
+    return !invertPredicate ? Hexagon::XOR_rr_cPt :
+                              Hexagon::XOR_rr_cNotPt;
+  case Hexagon::AND_rr:
+    return !invertPredicate ? Hexagon::AND_rr_cPt :
+                              Hexagon::AND_rr_cNotPt;
+  case Hexagon::OR_rr:
+    return !invertPredicate ? Hexagon::OR_rr_cPt :
+                              Hexagon::OR_rr_cNotPt;
+  case Hexagon::SUB_rr:
+    return !invertPredicate ? Hexagon::SUB_rr_cPt :
+                              Hexagon::SUB_rr_cNotPt;
+  case Hexagon::COMBINE_rr:
+    return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+                              Hexagon::COMBINE_rr_cNotPt;
+  case Hexagon::ASLH:
+    return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+                              Hexagon::ASLH_cNotPt_V4;
+  case Hexagon::ASRH:
+    return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+                              Hexagon::ASRH_cNotPt_V4;
+  case Hexagon::SXTB:
+    return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+                              Hexagon::SXTB_cNotPt_V4;
+  case Hexagon::SXTH:
+    return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+                              Hexagon::SXTH_cNotPt_V4;
+  case Hexagon::ZXTB:
+    return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+                              Hexagon::ZXTB_cNotPt_V4;
+  case Hexagon::ZXTH:
+    return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+                              Hexagon::ZXTH_cNotPt_V4;
+
+  case Hexagon::JMPR:
+    return !invertPredicate ? Hexagon::JMPR_cPt :
+                              Hexagon::JMPR_cNotPt;
+
+  // V4 indexed+scaled load.
+  case Hexagon::LDrid_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
+                              Hexagon::LDrid_indexed_cNotPt_V4;
+  case Hexagon::LDrid_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+                              Hexagon::LDrid_indexed_shl_cNotPt_V4;
+  case Hexagon::LDrib_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
+                              Hexagon::LDrib_indexed_cNotPt_V4;
+  case Hexagon::LDriub_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+                              Hexagon::LDriub_indexed_cNotPt_V4;
+  case Hexagon::LDriub_ae_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+                              Hexagon::LDriub_indexed_cNotPt_V4;
+  case Hexagon::LDrib_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+                              Hexagon::LDrib_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriub_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+                              Hexagon::LDriub_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriub_ae_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+                              Hexagon::LDriub_indexed_shl_cNotPt_V4;
+  case Hexagon::LDrih_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
+                              Hexagon::LDrih_indexed_cNotPt_V4;
+  case Hexagon::LDriuh_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+                              Hexagon::LDriuh_indexed_cNotPt_V4;
+  case Hexagon::LDriuh_ae_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+                              Hexagon::LDriuh_indexed_cNotPt_V4;
+  case Hexagon::LDrih_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+                              Hexagon::LDrih_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriuh_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+                              Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriuh_ae_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+                              Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriw_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
+                              Hexagon::LDriw_indexed_cNotPt_V4;
+  case Hexagon::LDriw_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+                              Hexagon::LDriw_indexed_shl_cNotPt_V4;
+    // Byte.
+  case Hexagon::POST_STbri:
+    return !invertPredicate ? Hexagon::POST_STbri_cPt :
+                              Hexagon::POST_STbri_cNotPt;
+  case Hexagon::STrib:
+    return !invertPredicate ? Hexagon::STrib_cPt :
+                              Hexagon::STrib_cNotPt;
+  case Hexagon::STrib_indexed:
+    return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+                              Hexagon::STrib_indexed_cNotPt;
+  case Hexagon::STrib_imm_V4:
+    return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+                              Hexagon::STrib_imm_cNotPt_V4;
+  case Hexagon::STrib_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+                              Hexagon::STrib_indexed_shl_cNotPt_V4;
+  // Halfword.
+  case Hexagon::POST_SThri:
+    return !invertPredicate ? Hexagon::POST_SThri_cPt :
+                              Hexagon::POST_SThri_cNotPt;
+  case Hexagon::STrih:
+    return !invertPredicate ? Hexagon::STrih_cPt :
+                              Hexagon::STrih_cNotPt;
+  case Hexagon::STrih_indexed:
+    return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+                              Hexagon::STrih_indexed_cNotPt;
+  case Hexagon::STrih_imm_V4:
+    return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+                              Hexagon::STrih_imm_cNotPt_V4;
+  case Hexagon::STrih_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+                              Hexagon::STrih_indexed_shl_cNotPt_V4;
+  // Word.
+  case Hexagon::POST_STwri:
+    return !invertPredicate ? Hexagon::POST_STwri_cPt :
+                              Hexagon::POST_STwri_cNotPt;
+  case Hexagon::STriw:
+    return !invertPredicate ? Hexagon::STriw_cPt :
+                              Hexagon::STriw_cNotPt;
+  case Hexagon::STriw_indexed:
+    return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+                              Hexagon::STriw_indexed_cNotPt;
+  case Hexagon::STriw_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+                              Hexagon::STriw_indexed_shl_cNotPt_V4;
+  case Hexagon::STriw_imm_V4:
+    return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+                              Hexagon::STriw_imm_cNotPt_V4;
+  // Double word.
+  case Hexagon::POST_STdri:
+    return !invertPredicate ? Hexagon::POST_STdri_cPt :
+                              Hexagon::POST_STdri_cNotPt;
+  case Hexagon::STrid:
+    return !invertPredicate ? Hexagon::STrid_cPt :
+                              Hexagon::STrid_cNotPt;
+  case Hexagon::STrid_indexed:
+    return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+                              Hexagon::STrid_indexed_cNotPt;
+  case Hexagon::STrid_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+                              Hexagon::STrid_indexed_shl_cNotPt_V4;
+  // Load.
+  case Hexagon::LDrid:
+    return !invertPredicate ? Hexagon::LDrid_cPt :
+                              Hexagon::LDrid_cNotPt;
+  case Hexagon::LDriw:
+    return !invertPredicate ? Hexagon::LDriw_cPt :
+                              Hexagon::LDriw_cNotPt;
+  case Hexagon::LDrih:
+    return !invertPredicate ? Hexagon::LDrih_cPt :
+                              Hexagon::LDrih_cNotPt;
+  case Hexagon::LDriuh:
+    return !invertPredicate ? Hexagon::LDriuh_cPt :
+                              Hexagon::LDriuh_cNotPt;
+  case Hexagon::LDrib:
+    return !invertPredicate ? Hexagon::LDrib_cPt :
+                              Hexagon::LDrib_cNotPt;
+  case Hexagon::LDriub:
+    return !invertPredicate ? Hexagon::LDriub_cPt :
+                              Hexagon::LDriub_cNotPt;
+  case Hexagon::LDriubit:
+    return !invertPredicate ? Hexagon::LDriub_cPt :
+                              Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+  case Hexagon::LDrid_indexed:
+    return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+                              Hexagon::LDrid_indexed_cNotPt;
+  case Hexagon::LDriw_indexed:
+    return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+                              Hexagon::LDriw_indexed_cNotPt;
+  case Hexagon::LDrih_indexed:
+    return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+                              Hexagon::LDrih_indexed_cNotPt;
+  case Hexagon::LDriuh_indexed:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+                              Hexagon::LDriuh_indexed_cNotPt;
+  case Hexagon::LDrib_indexed:
+    return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+                              Hexagon::LDrib_indexed_cNotPt;
+  case Hexagon::LDriub_indexed:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+                              Hexagon::LDriub_indexed_cNotPt;
+  // Post Increment Load.
+  case Hexagon::POST_LDrid:
+    return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+                              Hexagon::POST_LDrid_cNotPt;
+  case Hexagon::POST_LDriw:
+    return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+                              Hexagon::POST_LDriw_cNotPt;
+  case Hexagon::POST_LDrih:
+    return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+                              Hexagon::POST_LDrih_cNotPt;
+  case Hexagon::POST_LDriuh:
+    return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+                              Hexagon::POST_LDriuh_cNotPt;
+  case Hexagon::POST_LDrib:
+    return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+                              Hexagon::POST_LDrib_cNotPt;
+  case Hexagon::POST_LDriub:
+    return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+                              Hexagon::POST_LDriub_cNotPt;
+  // DEALLOC_RETURN.
+  case Hexagon::DEALLOC_RET_V4:
+    return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+                              Hexagon::DEALLOC_RET_cNotPt_V4;
+  default:
+    assert(false && "Unexpected predicable instruction");
+  }
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Cond) const {
+  int Opc = MI->getOpcode();
+  assert (isPredicable(MI) && "Expected predicable instruction");
+  bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+                     (Cond[0].getImm() == 0));
+  MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+  //
+  // This assumes that the predicate is always the first operand
+  // in the set of inputs.
+  //
+  MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+  int oper;
+  for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
+      break;
+    }
+
+    if (MO.isReg()) {
+      MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+                                              MO.isImplicit(), MO.isKill(),
+                                              MO.isDead(), MO.isUndef(),
+                                              MO.isDebug());
+    } else if (MO.isImm()) {
+      MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+    } else {
+      assert(false && "Unexpected operand type");
+    }
+  }
+
+  int regPos = invertJump ? 1 : 0;
+  MachineOperand PredMO = Cond[regPos];
+  MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+                                          PredMO.isImplicit(), PredMO.isKill(),
+                                          PredMO.isDead(), PredMO.isUndef(),
+                                          PredMO.isDebug());
+
+  return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+                    unsigned NumCyles,
+                    unsigned ExtraPredCycles,
+                    const BranchProbability &Probability) const {
+  return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned NumTCycles,
+                    unsigned ExtraTCycles,
+                    MachineBasicBlock &FMBB,
+                    unsigned NumFCycles,
+                    unsigned ExtraFCycles,
+                    const BranchProbability &Probability) const {
+  return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::TFR_cPt:
+  case Hexagon::TFR_cNotPt:
+  case Hexagon::TFRI_cPt:
+  case Hexagon::TFRI_cNotPt:
+  case Hexagon::TFR_cdnPt:
+  case Hexagon::TFR_cdnNotPt:
+  case Hexagon::TFRI_cdnPt:
+  case Hexagon::TFRI_cdnNotPt:
+    return true;
+
+  case Hexagon::JMP_Pred:
+  case Hexagon::JMP_PredNot:
+  case Hexagon::BRCOND:
+  case Hexagon::JMP_PredPt:
+  case Hexagon::JMP_PredNotPt:
+  case Hexagon::JMP_PredPnt:
+  case Hexagon::JMP_PredNotPnt:
+    return true;
+
+  case Hexagon::LDrid_indexed_cPt_V4 :
+  case Hexagon::LDrid_indexed_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_cNotPt_V4 :
+  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_cPt_V4 :
+  case Hexagon::LDrib_indexed_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_cNotPt_V4 :
+  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_cPt_V4 :
+  case Hexagon::LDriub_indexed_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_cNotPt_V4 :
+  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_cPt_V4 :
+  case Hexagon::LDrih_indexed_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_cNotPt_V4 :
+  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_cPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_cNotPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_cPt_V4 :
+  case Hexagon::LDriw_indexed_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_cNotPt_V4 :
+  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+    return true;
+
+  case Hexagon::LDrid_cPt :
+  case Hexagon::LDrid_cNotPt :
+  case Hexagon::LDrid_indexed_cPt :
+  case Hexagon::LDrid_indexed_cNotPt :
+  case Hexagon::POST_LDrid_cPt :
+  case Hexagon::POST_LDrid_cNotPt :
+  case Hexagon::LDriw_cPt :
+  case Hexagon::LDriw_cNotPt :
+  case Hexagon::LDriw_indexed_cPt :
+  case Hexagon::LDriw_indexed_cNotPt :
+  case Hexagon::POST_LDriw_cPt :
+  case Hexagon::POST_LDriw_cNotPt :
+  case Hexagon::LDrih_cPt :
+  case Hexagon::LDrih_cNotPt :
+  case Hexagon::LDrih_indexed_cPt :
+  case Hexagon::LDrih_indexed_cNotPt :
+  case Hexagon::POST_LDrih_cPt :
+  case Hexagon::POST_LDrih_cNotPt :
+  case Hexagon::LDrib_cPt :
+  case Hexagon::LDrib_cNotPt :
+  case Hexagon::LDrib_indexed_cPt :
+  case Hexagon::LDrib_indexed_cNotPt :
+  case Hexagon::POST_LDrib_cPt :
+  case Hexagon::POST_LDrib_cNotPt :
+  case Hexagon::LDriuh_cPt :
+  case Hexagon::LDriuh_cNotPt :
+  case Hexagon::LDriuh_indexed_cPt :
+  case Hexagon::LDriuh_indexed_cNotPt :
+  case Hexagon::POST_LDriuh_cPt :
+  case Hexagon::POST_LDriuh_cNotPt :
+  case Hexagon::LDriub_cPt :
+  case Hexagon::LDriub_cNotPt :
+  case Hexagon::LDriub_indexed_cPt :
+  case Hexagon::LDriub_indexed_cNotPt :
+  case Hexagon::POST_LDriub_cPt :
+  case Hexagon::POST_LDriub_cNotPt :
+    return true;
+
+  case Hexagon::LDrid_cdnPt :
+  case Hexagon::LDrid_cdnNotPt :
+  case Hexagon::LDrid_indexed_cdnPt :
+  case Hexagon::LDrid_indexed_cdnNotPt :
+  case Hexagon::POST_LDrid_cdnPt_V4 :
+  case Hexagon::POST_LDrid_cdnNotPt_V4 :
+  case Hexagon::LDriw_cdnPt :
+  case Hexagon::LDriw_cdnNotPt :
+  case Hexagon::LDriw_indexed_cdnPt :
+  case Hexagon::LDriw_indexed_cdnNotPt :
+  case Hexagon::POST_LDriw_cdnPt_V4 :
+  case Hexagon::POST_LDriw_cdnNotPt_V4 :
+  case Hexagon::LDrih_cdnPt :
+  case Hexagon::LDrih_cdnNotPt :
+  case Hexagon::LDrih_indexed_cdnPt :
+  case Hexagon::LDrih_indexed_cdnNotPt :
+  case Hexagon::POST_LDrih_cdnPt_V4 :
+  case Hexagon::POST_LDrih_cdnNotPt_V4 :
+  case Hexagon::LDrib_cdnPt :
+  case Hexagon::LDrib_cdnNotPt :
+  case Hexagon::LDrib_indexed_cdnPt :
+  case Hexagon::LDrib_indexed_cdnNotPt :
+  case Hexagon::POST_LDrib_cdnPt_V4 :
+  case Hexagon::POST_LDrib_cdnNotPt_V4 :
+  case Hexagon::LDriuh_cdnPt :
+  case Hexagon::LDriuh_cdnNotPt :
+  case Hexagon::LDriuh_indexed_cdnPt :
+  case Hexagon::LDriuh_indexed_cdnNotPt :
+  case Hexagon::POST_LDriuh_cdnPt_V4 :
+  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+  case Hexagon::LDriub_cdnPt :
+  case Hexagon::LDriub_cdnNotPt :
+  case Hexagon::LDriub_indexed_cdnPt :
+  case Hexagon::LDriub_indexed_cdnNotPt :
+  case Hexagon::POST_LDriub_cdnPt_V4 :
+  case Hexagon::POST_LDriub_cdnNotPt_V4 :
+    return true;
+
+  case Hexagon::ADD_ri_cPt:
+  case Hexagon::ADD_ri_cNotPt:
+  case Hexagon::ADD_ri_cdnPt:
+  case Hexagon::ADD_ri_cdnNotPt:
+  case Hexagon::ADD_rr_cPt:
+  case Hexagon::ADD_rr_cNotPt:
+  case Hexagon::ADD_rr_cdnPt:
+  case Hexagon::ADD_rr_cdnNotPt:
+  case Hexagon::XOR_rr_cPt:
+  case Hexagon::XOR_rr_cNotPt:
+  case Hexagon::XOR_rr_cdnPt:
+  case Hexagon::XOR_rr_cdnNotPt:
+  case Hexagon::AND_rr_cPt:
+  case Hexagon::AND_rr_cNotPt:
+  case Hexagon::AND_rr_cdnPt:
+  case Hexagon::AND_rr_cdnNotPt:
+  case Hexagon::OR_rr_cPt:
+  case Hexagon::OR_rr_cNotPt:
+  case Hexagon::OR_rr_cdnPt:
+  case Hexagon::OR_rr_cdnNotPt:
+  case Hexagon::SUB_rr_cPt:
+  case Hexagon::SUB_rr_cNotPt:
+  case Hexagon::SUB_rr_cdnPt:
+  case Hexagon::SUB_rr_cdnNotPt:
+  case Hexagon::COMBINE_rr_cPt:
+  case Hexagon::COMBINE_rr_cNotPt:
+  case Hexagon::COMBINE_rr_cdnPt:
+  case Hexagon::COMBINE_rr_cdnNotPt:
+    return true;
+
+  case Hexagon::ASLH_cPt_V4:
+  case Hexagon::ASLH_cNotPt_V4:
+  case Hexagon::ASRH_cPt_V4:
+  case Hexagon::ASRH_cNotPt_V4:
+  case Hexagon::SXTB_cPt_V4:
+  case Hexagon::SXTB_cNotPt_V4:
+  case Hexagon::SXTH_cPt_V4:
+  case Hexagon::SXTH_cNotPt_V4:
+  case Hexagon::ZXTB_cPt_V4:
+  case Hexagon::ZXTB_cNotPt_V4:
+  case Hexagon::ZXTH_cPt_V4:
+  case Hexagon::ZXTH_cNotPt_V4:
+    return true;
+
+  case Hexagon::ASLH_cdnPt_V4:
+  case Hexagon::ASLH_cdnNotPt_V4:
+  case Hexagon::ASRH_cdnPt_V4:
+  case Hexagon::ASRH_cdnNotPt_V4:
+  case Hexagon::SXTB_cdnPt_V4:
+  case Hexagon::SXTB_cdnNotPt_V4:
+  case Hexagon::SXTH_cdnPt_V4:
+  case Hexagon::SXTH_cdnNotPt_V4:
+  case Hexagon::ZXTB_cdnPt_V4:
+  case Hexagon::ZXTB_cdnNotPt_V4:
+  case Hexagon::ZXTH_cdnPt_V4:
+  case Hexagon::ZXTH_cdnNotPt_V4:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                   std::vector<MachineOperand> &Pred) const {
+  for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if (MO.isReg() && MO.isDef()) {
+      const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+      if (RC == Hexagon::PredRegsRegisterClass) {
+        Pred.push_back(MO);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  // TODO: Fix this
+  return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+    Cond.erase(Cond.begin());
+  } else {
+    Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+                          const BranchProbability &Probability) const {
+  return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::DEALLOC_RET_V4 :
+  case Hexagon::DEALLOC_RET_cPt_V4 :
+  case Hexagon::DEALLOC_RET_cNotPt_V4 :
+  case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+  case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+  case Hexagon::DEALLOC_RET_cdnPt_V4 :
+  case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+   return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+  // This function is to check whether the "Offset" is in the correct range of
+  // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+  // inserted to calculate the final address. Due to this reason, the function
+  // assumes that the "Offset" has correct alignment.
+
+  switch(Opcode) {
+
+  case Hexagon::LDriw:
+  case Hexagon::STriw:
+  case Hexagon::STriwt:
+    assert((Offset % 4 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+  case Hexagon::LDrid:
+  case Hexagon::STrid:
+    assert((Offset % 8 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+  case Hexagon::LDrih:
+  case Hexagon::LDriuh:
+  case Hexagon::STrih:
+  case Hexagon::LDrih_ae:
+    assert((Offset % 2 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+  case Hexagon::LDrib:
+  case Hexagon::STrib:
+  case Hexagon::LDriub:
+  case Hexagon::LDriubit:
+  case Hexagon::LDrib_ae:
+  case Hexagon::LDriub_ae:
+    return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+  case Hexagon::ADD_ri:
+  case Hexagon::TFR_FI:
+    return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+      (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+  case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDi_MEM_V4 :
+  case Hexagon::MEMw_SUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDr_MEM_V4 :
+  case Hexagon::MEMw_SUBr_MEM_V4 :
+  case Hexagon::MEMw_ANDr_MEM_V4 :
+  case Hexagon::MEMw_ORr_MEM_V4 :
+    assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+    return (0 <= Offset && Offset <= 255);
+
+  case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDi_MEM_V4 :
+  case Hexagon::MEMh_SUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDr_MEM_V4 :
+  case Hexagon::MEMh_SUBr_MEM_V4 :
+  case Hexagon::MEMh_ANDr_MEM_V4 :
+  case Hexagon::MEMh_ORr_MEM_V4 :
+    assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+    return (0 <= Offset && Offset <= 127);
+
+  case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDi_MEM_V4 :
+  case Hexagon::MEMb_SUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDr_MEM_V4 :
+  case Hexagon::MEMb_SUBr_MEM_V4 :
+  case Hexagon::MEMb_ANDr_MEM_V4 :
+  case Hexagon::MEMb_ORr_MEM_V4 :
+    return (0 <= Offset && Offset <= 63);
+
+  // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+  // any size. Later pass knows how to handle it.
+  case Hexagon::STriw_pred:
+  case Hexagon::LDriw_pred:
+    return true;
+
+  // INLINEASM is very special.
+  case Hexagon::INLINEASM:
+    return true;
+  }
+
+  assert(0 && "No offset range is defined for this opcode. Please define it in \
+               the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+  if (VT == MVT::i64) {
+      return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+              (Offset & 0x7) == 0);
+  }
+  if (VT == MVT::i32) {
+      return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+              (Offset & 0x3) == 0);
+  }
+  if (VT == MVT::i16) {
+      return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+              (Offset & 0x1) == 0);
+  }
+  if (VT == MVT::i8) {
+      return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMB_AUTOINC_MAX);
+  }
+  
+  assert(0 && "Not an auto-inc opc!");
+
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode())
+  {
+    case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMw_ADDi_MEM_V4 :
+    case Hexagon::MEMw_SUBi_MEM_V4 :
+    case Hexagon::MEMw_ADDr_MEM_V4 :
+    case Hexagon::MEMw_SUBr_MEM_V4 :
+    case Hexagon::MEMw_ANDr_MEM_V4 :
+    case Hexagon::MEMw_ORr_MEM_V4 :
+    case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMh_ADDi_MEM_V4 :
+    case Hexagon::MEMh_SUBi_MEM_V4 :
+    case Hexagon::MEMh_ADDr_MEM_V4 :
+    case Hexagon::MEMh_SUBr_MEM_V4 :
+    case Hexagon::MEMh_ANDr_MEM_V4 :
+    case Hexagon::MEMh_ORr_MEM_V4 :
+    case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMb_ADDi_MEM_V4 :
+    case Hexagon::MEMb_SUBi_MEM_V4 :
+    case Hexagon::MEMb_ADDr_MEM_V4 :
+    case Hexagon::MEMb_SUBr_MEM_V4 :
+    case Hexagon::MEMb_ANDr_MEM_V4 :
+    case Hexagon::MEMb_ORr_MEM_V4 :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode())
+  {
+    case Hexagon::STriw_pred :
+    case Hexagon::LDriw_pred :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+  const HexagonRegisterInfo& QRI = getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    case Hexagon::ADD_ri_cPt:
+    case Hexagon::ADD_ri_cNotPt:
+    case Hexagon::ADD_rr_cPt:
+    case Hexagon::ADD_rr_cNotPt:
+    case Hexagon::XOR_rr_cPt:
+    case Hexagon::XOR_rr_cNotPt:
+    case Hexagon::AND_rr_cPt:
+    case Hexagon::AND_rr_cNotPt:
+    case Hexagon::OR_rr_cPt:
+    case Hexagon::OR_rr_cNotPt:
+    case Hexagon::SUB_rr_cPt:
+    case Hexagon::SUB_rr_cNotPt:
+    case Hexagon::COMBINE_rr_cPt:
+    case Hexagon::COMBINE_rr_cNotPt:
+      return true;
+    case Hexagon::ASLH_cPt_V4:
+    case Hexagon::ASLH_cNotPt_V4:
+    case Hexagon::ASRH_cPt_V4:
+    case Hexagon::ASRH_cNotPt_V4:
+    case Hexagon::SXTB_cPt_V4:
+    case Hexagon::SXTB_cNotPt_V4:
+    case Hexagon::SXTH_cPt_V4:
+    case Hexagon::SXTH_cNotPt_V4:
+    case Hexagon::ZXTB_cPt_V4:
+    case Hexagon::ZXTB_cNotPt_V4:
+    case Hexagon::ZXTH_cPt_V4:
+    case Hexagon::ZXTH_cNotPt_V4:
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+    default:
+      return false;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+  const HexagonRegisterInfo& QRI = getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    case Hexagon::LDrid_cPt :
+    case Hexagon::LDrid_cNotPt :
+    case Hexagon::LDrid_indexed_cPt :
+    case Hexagon::LDrid_indexed_cNotPt :
+    case Hexagon::LDriw_cPt :
+    case Hexagon::LDriw_cNotPt :
+    case Hexagon::LDriw_indexed_cPt :
+    case Hexagon::LDriw_indexed_cNotPt :
+    case Hexagon::LDrih_cPt :
+    case Hexagon::LDrih_cNotPt :
+    case Hexagon::LDrih_indexed_cPt :
+    case Hexagon::LDrih_indexed_cNotPt :
+    case Hexagon::LDrib_cPt :
+    case Hexagon::LDrib_cNotPt :
+    case Hexagon::LDrib_indexed_cPt :
+    case Hexagon::LDrib_indexed_cNotPt :
+    case Hexagon::LDriuh_cPt :
+    case Hexagon::LDriuh_cNotPt :
+    case Hexagon::LDriuh_indexed_cPt :
+    case Hexagon::LDriuh_indexed_cNotPt :
+    case Hexagon::LDriub_cPt :
+    case Hexagon::LDriub_cNotPt :
+    case Hexagon::LDriub_indexed_cPt :
+    case Hexagon::LDriub_indexed_cNotPt :
+      return true;
+    case Hexagon::POST_LDrid_cPt :
+    case Hexagon::POST_LDrid_cNotPt :
+    case Hexagon::POST_LDriw_cPt :
+    case Hexagon::POST_LDriw_cNotPt :
+    case Hexagon::POST_LDrih_cPt :
+    case Hexagon::POST_LDrih_cNotPt :
+    case Hexagon::POST_LDrib_cPt :
+    case Hexagon::POST_LDrib_cNotPt :
+    case Hexagon::POST_LDriuh_cPt :
+    case Hexagon::POST_LDriuh_cNotPt :
+    case Hexagon::POST_LDriub_cPt :
+    case Hexagon::POST_LDriub_cNotPt :
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+    case Hexagon::LDrid_indexed_cPt_V4 :
+    case Hexagon::LDrid_indexed_cNotPt_V4 :
+    case Hexagon::LDrid_indexed_shl_cPt_V4 :
+    case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDrib_indexed_cPt_V4 :
+    case Hexagon::LDrib_indexed_cNotPt_V4 :
+    case Hexagon::LDrib_indexed_shl_cPt_V4 :
+    case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriub_indexed_cPt_V4 :
+    case Hexagon::LDriub_indexed_cNotPt_V4 :
+    case Hexagon::LDriub_indexed_shl_cPt_V4 :
+    case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDrih_indexed_cPt_V4 :
+    case Hexagon::LDrih_indexed_cNotPt_V4 :
+    case Hexagon::LDrih_indexed_shl_cPt_V4 :
+    case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriuh_indexed_cPt_V4 :
+    case Hexagon::LDriuh_indexed_cNotPt_V4 :
+    case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+    case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriw_indexed_cPt_V4 :
+    case Hexagon::LDriw_indexed_cNotPt_V4 :
+    case Hexagon::LDriw_indexed_shl_cPt_V4 :
+    case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+    default:
+      return false;
+  }
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 0000000..d549c46
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,166 @@
+//=- HexagonInstrInfo.h - Hexagon Instruction Information ---------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "HexagonRegisterInfo.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+  const HexagonRegisterInfo RI;
+  const HexagonSubtarget& Subtarget;
+public:
+  explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+  virtual bool
+  PredicateInstruction(MachineInstr *MI,
+                       const SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                                   unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumTCycles, unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumFCycles, unsigned ExtraFCycles,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+  virtual bool
+  SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                    const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool
+  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool
+  isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+                            const BranchProbability &Probability) const;
+
+  bool isValidOffset(const int Opcode, const int Offset) const;
+  bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+  bool isMemOp(const MachineInstr *MI) const;
+  bool isSpillPredRegOp(const MachineInstr *MI) const;
+  bool isU6_3Immediate(const int value) const;
+  bool isU6_2Immediate(const int value) const;
+  bool isU6_1Immediate(const int value) const;
+  bool isU6_0Immediate(const int value) const;
+  bool isS4_3Immediate(const int value) const;
+  bool isS4_2Immediate(const int value) const;
+  bool isS4_1Immediate(const int value) const;
+  bool isS4_0Immediate(const int value) const;
+  bool isS12_Immediate(const int value) const;
+  bool isU6_Immediate(const int value) const;
+  bool isS8_Immediate(const int value) const;
+  bool isS6_Immediate(const int value) const;
+
+  bool isConditionalALU32 (const MachineInstr* MI) const;
+  bool isConditionalLoad (const MachineInstr* MI) const;
+  bool isDeallocRet(const MachineInstr *MI) const;
+
+private:
+  int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 0000000..cc508b7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,3014 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonImmediates.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
+def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printHexagonMEMrrOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+// Address operands
+def MEMri : Operand<i32> {
+  let PrintMethod = "printHexagonMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+  let PrintMethod = "printHexagonMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+  let PrintMethod = "printHexagonFrameIndexOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in
+  def globaladdress : Operand<i32>;
+
+let PrintMethod = "printJumpTable" in
+ def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+  def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+                 [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+  def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+let isPredicable = 1 in
+def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = add($src1, $src2)",
+            [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s16Imm:$src2),
+            "$dst = add($src1, #$src2)",
+            [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+
+// Logical operations.
+let isPredicable = 1 in
+def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = xor($src1, $src2)",
+            [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def AND_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = and($src1, $src2)",
+            [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = or($src1, #$src2)",
+            [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1),
+            "$dst = not($src1)",
+            [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s10Imm:$src2),
+            "$dst = and($src1, #$src2)",
+            [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+
+let isPredicable = 1 in
+def OR_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = or($src1, $src2)",
+            [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+          "$dst = neg($src1)",
+          [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+          "nop",
+          []>;
+
+// Subtract.
+let isPredicable = 1 in
+def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = sub($src1, $src2)",
+            [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+
+// Transfer immediate.
+let isReMaterializable = 1, isPredicable = 1 in
+def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
+           "$dst = #$src1",
+           [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+
+// Transfer register.
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
+          "$dst = $src1",
+          []>;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+           "$dst = $src1",
+           []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = combine($src1, $src2)",
+            []>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+                                                   DoubleRegs:$src2,
+                                                   DoubleRegs:$src3),
+            "$dst = vmux($src1, $src2, $src3)",
+            []>;
+
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                            IntRegs:$src2, IntRegs:$src3),
+             "$dst = mux($src1, $src2, $src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+                                                IntRegs:$src3),
+             "$dst = mux($src1, #$src2, $src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1,
+                                         s8ImmPred:$src2, IntRegs:$src3))]>;
+
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+                                                s8Imm:$src3),
+             "$dst = mux($src1, $src2, #$src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                         s8ImmPred:$src3))]>;
+
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+                                                s8Imm:$src3),
+             "$dst = mux($src1, #$src2, #$src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
+                                         s8ImmPred:$src3))]>;
+
+// Shift halfword.
+let isPredicable = 1 in
+def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = aslh($src1)",
+           [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+
+let isPredicable = 1 in
+def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = asrh($src1)",
+           [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+
+// Sign extend.
+let isPredicable = 1 in
+def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxtb($src1)",
+           [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+
+let isPredicable = 1 in
+def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxth($src1)",
+           [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+
+// Zero extend.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = zxtb($src1)",
+           []>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+                    "$dst = zxth($src1)",
+                    []>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional add.
+let neverHasSideEffects = 1 in
+def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if ($src1) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if (!$src1) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if ($src1.new) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if (!$src1.new) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = add($src2, $src3)",
+            []>;
+
+
+// Conditional combine.
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = combine($src2, $src3)",
+            []>;
+
+// Conditional logical operations.
+
+def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = xor($src2, $src3)",
+            []>;
+
+def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = xor($src2, $src3)",
+            []>;
+
+def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = xor($src2, $src3)",
+            []>;
+
+def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = xor($src2, $src3)",
+            []>;
+
+def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = and($src2, $src3)",
+            []>;
+
+def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = and($src2, $src3)",
+            []>;
+
+def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = and($src2, $src3)",
+            []>;
+
+def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = and($src2, $src3)",
+            []>;
+
+def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = or($src2, $src3)",
+            []>;
+
+def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = or($src2, $src3)",
+            []>;
+
+def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = or($src2, $src3)",
+            []>;
+
+def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = or($src2, $src3)",
+            []>;
+
+
+// Conditional subtract.
+
+def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = sub($src2, $src3)",
+            []>;
+
+def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = sub($src2, $src3)",
+            []>;
+
+def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = sub($src2, $src3)",
+            []>;
+
+def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = sub($src2, $src3)",
+            []>;
+
+
+// Conditional transfer.
+
+let neverHasSideEffects = 1 in
+def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
+              "if ($src1) $dst = $src2",
+              []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                    IntRegs:$src2),
+                 "if (!$src1) $dst = $src2",
+                 []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
+               "if ($src1) $dst = #$src2",
+               []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                     s12Imm:$src2),
+                  "if (!$src1) $dst = #$src2",
+                  []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                   IntRegs:$src2),
+                "if ($src1.new) $dst = $src2",
+                []>;
+
+let neverHasSideEffects = 1 in
+def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                      IntRegs:$src2),
+                   "if (!$src1.new) $dst = $src2",
+                   []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                    s12Imm:$src2),
+                 "if ($src1.new) $dst = #$src2",
+                 []>;
+
+let neverHasSideEffects = 1 in
+def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                       s12Imm:$src2),
+                    "if (!$src1.new) $dst = #$src2",
+                    []>;
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/VH +
+//===----------------------------------------------------------------------===//
+// Vector add halfwords
+
+// Vector averagehalfwords
+
+// Vector subtract halfwords
+//===----------------------------------------------------------------------===//
+// ALU32/VH -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = add($src1, $src2)",
+               [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = and($src1, $src2)",
+               [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = or($src1, $src2)",
+              [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = xor($src1, $src2)",
+               [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+              "$dst = max($src2, $src1)",
+              [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
+                                                     IntRegs:$src1)),
+                                          IntRegs:$src1, IntRegs:$src2))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+              "$dst = min($src2, $src1)",
+              [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
+                                                     IntRegs:$src1)),
+                                          IntRegs:$src1, IntRegs:$src2))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = sub($src1, $src2)",
+               [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Subtract halfword.
+
+// Transfer register.
+let neverHasSideEffects = 1 in
+def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+             "$dst = $src1",
+             []>;
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VB +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VH +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VW +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+             "$dst = and($src1, $src2)",
+             [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+                                                 PredRegs:$src2),
+                "$dst = and($src1, !$src2)",
+                []>;
+
+def NOT_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = not($src1)",
+             [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = any8($src1)",
+             []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = all8($src1)",
+             []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                 PredRegs:$src2),
+             "$dst = vitpack($src1, $src2)",
+             []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2,
+                                                    PredRegs:$src3),
+             "$dst = valignb($src1, $src2, $src3)",
+             []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2,
+                                                     PredRegs:$src3),
+             "$dst = vspliceb($src1, $src2, $src3)",
+             []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+             "$dst = mask($src1)",
+             []>;
+
+def NOT_Ps : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = not($src1)",
+             [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+            "$dst = or($src1, $src2)",
+            [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+             "$dst = xor($src1, $src2)",
+             [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+  def JMP : JInst< (outs),
+            (ins brtarget:$offset),
+            "jump $offset",
+            [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC] in {
+  def JMP_Pred : JInst< (outs),
+                 (ins PredRegs:$src, brtarget:$offset),
+                 "if ($src) jump $offset",
+                 [(brcond PredRegs:$src, bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  def JMP_PredNot : JInst< (outs),
+                    (ins PredRegs:$src, brtarget:$offset),
+                    "if (!$src) jump $offset",
+                    []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC] in {
+  def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+               "if ($pred) jump $dst",
+               []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  def JMP_PredPt : JInst< (outs),
+                   (ins PredRegs:$src, brtarget:$offset),
+                   "if ($src.new) jump:t $offset",
+                   []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  def JMP_PredNotPt : JInst< (outs),
+                      (ins PredRegs:$src, brtarget:$offset),
+                      "if (!$src.new) jump:t $offset",
+                      []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  def JMP_PredPnt : JInst< (outs),
+                    (ins PredRegs:$src, brtarget:$offset),
+                    "if ($src.new) jump:nt $offset",
+                    []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  def JMP_PredNotPnt : JInst< (outs),
+                       (ins PredRegs:$src, brtarget:$offset),
+                       "if (!$src.new) jump:nt $offset",
+                       []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                               [SDNPHasChain, SDNPOptInGlue]>;
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR: JRInst<(outs), (ins),
+                   "jumpr r31",
+                   [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+                       "if ($src1) jumpr r31",
+                       []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+                          "if (!$src1) jumpr r31",
+                          []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+// Load doubleword.
+let isPredicable = 1 in
+def LDrid : LDInst<(outs DoubleRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memd($addr)",
+            [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, s11_3Imm:$offset),
+            "$dst=memd($src1+#$offset)",
+            [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+                                              s11_3ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memd(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memd(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memd($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load doubleword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memd($addr)",
+            []>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if ($src1) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if (!$src1) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if ($src1) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if (!$src1) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if ($src1.new) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if (!$src1.new) $dst=memd($src2+#$src3)",
+            []>;
+
+
+// Load byte.
+let isPredicable = 1 in
+def LDrib : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memb($addr)",
+            [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+
+def LDrib_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memb($addr)",
+            [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+// Indexed load byte.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrib_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memb($src1+#$offset)",
+            [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memb($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memb(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memb(#$global)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memub(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memb($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1.new) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1.new) $dst = memb($src2+#$src3)",
+            []>;
+
+
+// Load halfword.
+let isPredicable = 1 in
+def LDrih : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memh($addr)",
+            [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrih_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memh($src1+#$offset)",
+            [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+                                                  s11_1ImmPred:$offset)))] >;
+
+def LDrih_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memh($addr)",
+            [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+let AddedComplexity = 20 in
+def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memh($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+                                                 s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memh(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memh(#$global)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memuh(#$global)",
+            []>;
+
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memh($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1.new) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1.new) $dst = memh($src2+#$src3)",
+            []>;
+
+// Load unsigned byte.
+let isPredicable = 1 in
+def LDriub : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1 in
+def LDriubit : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriub_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+let AddedComplexity = 20 in
+def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+def LDriub_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+
+let AddedComplexity = 20 in
+def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memub(#$global+$offset)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memub($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load unsigned byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1.new) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1.new) $dst = memub($src2+#$src3)",
+            []>;
+
+// Load unsigned halfword.
+let isPredicable = 1 in
+def LDriuh : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memuh($addr)",
+            [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+
+// Indexed load unsigned halfword.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memuh($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+                                                  s11_1ImmPred:$offset)))]>;
+
+def LDriuh_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memuh($addr)",
+            [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+
+// Indexed load unsigned halfword any-extend.
+let AddedComplexity = 20 in
+def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memuh($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+                                                 s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memuh(#$global+$offset)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memuh($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load unsigned halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1.new) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1.new) $dst = memuh($src2+#$src3)",
+            []>;
+
+
+// Load word.
+let isPredicable = 1 in
+def LDriw : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr), "$dst = memw($addr)",
+            [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+
+// Load predicate.
+let mayLoad = 1, Defs = [R10,R11] in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+            (ins MEMri:$addr),
+            "Error; should not emit",
+            []>;
+
+// Indexed load.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriw_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_2Imm:$offset),
+            "$dst=memw($src1+#$offset)",
+            [(set IntRegs:$dst, (load (add IntRegs:$src1,
+                                           s11_2ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memw(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memw(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memw($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load word conditionally.
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if ($src1) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if (!$src1) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if ($src1) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if (!$src1) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if ($src1.new) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if (!$src1.new) $dst=memw($src2+#$src3)",
+            []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+  def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+                     "deallocframe",
+                     []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+              "$dst =+ mpyi($src1, #$src2)",
+              [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+              "$dst =- mpyi($src1, #$src2)",
+              [(set IntRegs:$dst,
+               (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+              "$dst = mpyi($src1, #$src2)",
+              [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,Rt)
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+           "$dst = mpyi($src1, $src2)",
+           [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rx+=mpyi(Rs,#u8)
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            "$dst += mpyi($src2, #$src3)",
+            [(set IntRegs:$dst,
+            (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rx+=mpyi(Rs,Rt)
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpyi($src2, $src3)",
+            [(set IntRegs:$dst,
+            (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rx-=mpyi(Rs,#u8)
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            "$dst -= mpyi($src2, #$src3)",
+            [(set IntRegs:$dst,
+            (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+            "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          "$dst = mpy($src1, $src2)",
+          [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+           "$dst = mpyu($src1, $src2)",
+           [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = mpyu($src1, $src2)",
+             [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
+              (i64 (anyext IntRegs:$src2))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = mpy($src1, $src2)",
+             [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
+              (i64 (sext IntRegs:$src2))))]>;
+
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpy($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
+               DoubleRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst -= mpy($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (sub DoubleRegs:$src1,
+                (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+            "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst += mpyu($src2, $src3)",
+             [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
+              (i64 (anyext IntRegs:$src3))),
+               DoubleRegs:$src1))],"$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpyu($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (sub DoubleRegs:$src1,
+                    (mul (i64 (anyext IntRegs:$src2)),
+                         (i64 (anyext IntRegs:$src3)))))],
+            "$src1 = $dst">;
+
+
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst += add($src2, $src3)",
+             [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
+                                      IntRegs:$src1))],
+             "$src1 = $dst">;
+
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, s8Imm:$src3),
+             "$dst += add($src2, #$src3)",
+             [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
+                                      IntRegs:$src1))],
+             "$src1 = $dst">;
+
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst -= add($src2, $src3)",
+             [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
+                                                     IntRegs:$src3)))],
+             "$src1 = $dst">;
+
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, s8Imm:$src3),
+             "$dst -= add($src2, #$src3)",
+             [(set IntRegs:$dst, (sub IntRegs:$src1,
+                                      (add IntRegs:$src2, s8ImmPred:$src3)))],
+             "$src1 = $dst">;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+///    post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+///    last operand.
+///
+// Store doubleword.
+let isPredicable = 1 in
+def STrid : STInst<(outs),
+            (ins MEMri:$addr, DoubleRegs:$src1),
+            "memd($addr) = $src1",
+            [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+
+// Indexed store double word.
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_3Imm:$src2,  DoubleRegs:$src3),
+            "memd($src1+#$src2) = $src3",
+            [(store DoubleRegs:$src3,
+                                (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+            "memd(#$global+$offset) = $src",
+            []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STdri : STInstPI<(outs IntRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memd($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store doubleword conditionally.
+// if ([!]Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if ($src1) memd($addr) = $src2",
+            []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if (!$src1) memd($addr) = $src2",
+            []>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if ($src1) memd($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if (!$src1) memd($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if ($src1) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if (!$src1) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">;
+
+
+// Store byte.
+// memb(Rs+#s11:0)=Rt
+let isPredicable = 1 in
+def STrib : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memb($addr) = $src1",
+            [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+            "memb($src1+#$src2) = $src3",
+            [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
+                                               s11_0ImmPred:$src2))]>;
+
+// memb(gp+#u16:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memb(#$global+$offset) = $src",
+            []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP   : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memb(#$global) = $src",
+            []>;
+
+// memb(Rx++#s4:0)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                    IntRegs:$src2,
+                                                    s4Imm:$offset),
+            "memb($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+                            s4_0ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store byte conditionally.
+// if ([!]Pv) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memb($addr) = $src2",
+            []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memb($addr) = $src2",
+            []>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1) memb($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memb($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+// Store halfword.
+// memh(Rs+#s11:1)=Rt
+let isPredicable = 1 in
+def STrih : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memh($addr) = $src1",
+            [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_1Imm:$src2,  IntRegs:$src3),
+            "memh($src1+#$src2) = $src3",
+            [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
+                                                s11_1ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memh(#$global+$offset) = $src",
+            []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP   : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memh(#$global) = $src",
+            []>;
+
+// memh(Rx++#s4:1)=Rt.H
+// memh(Rx++#s4:1)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri : STInstPI<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memh($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+                             s4_1ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store halfword conditionally.
+// if ([!]Pv) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memh($addr) = $src2",
+            []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memh($addr) = $src2",
+            []>;
+
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1) memh($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memh($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11] in
+def STriw_pred : STInst<(outs),
+            (ins MEMri:$addr, PredRegs:$src1),
+            "Error; should not emit",
+            []>;
+
+// memw(Rs+#s11:2)=Rt
+let isPredicable = 1 in
+def STriw : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memw($addr) = $src1",
+            [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+            "memw($src1+#$src2) = $src3",
+            [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+
+def STriwt : STInst<(outs),
+            (ins MEMri:$addr, DoubleRegs:$src1),
+            "memw($addr) = $src1",
+            [(truncstorei32 DoubleRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memw(#$global+$offset) = $src",
+            []>;
+
+let hasCtrlDep = 1, isPredicable = 1  in
+def POST_STwri : STInstPI<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memw($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store word conditionally.
+// if ([!]Pv) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memw($addr) = $src2",
+            []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memw($addr) = $src2",
+            []>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1) memw($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memw($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+  def ALLOCFRAME : STInst<(outs),
+             (ins i32imm:$amt),
+             "allocframe(#$amt)",
+             []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+               "$dst = not($src1)",
+               [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxtw($src1)",
+           [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PERM +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+               "$dst = $src1  // Should almost never emit this",
+               []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+               "$dst = $src1  // Should almost never emit!",
+               [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+             "$dst = asr($src1, #$src2)",
+             [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+              "$dst = asr($src1, #$src2)",
+              [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          "$dst = asl($src1, #$src2)",
+          [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+             "$dst = lsr($src1, #$src2)",
+             [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+              "$dst = lsr($src1, #$src2)",
+              [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2,
+                                                     u6Imm:$src3),
+              "$dst += lsr($src2, #$src3)",
+              [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+                                          (srl DoubleRegs:$src2,
+                                           u6ImmPred:$src3)))],
+              "$src1 = $dst">;
+
+// Shift by immediate and accumulate.
+def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                     IntRegs:$src2,
+                                                     IntRegs:$src3),
+                 "$dst += asr($src2, $src3)",
+                 [], "$src1 = $dst">;
+
+// Shift by immediate and add.
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                             u3Imm:$src3),
+             "$dst = addasl($src1, $src2, #$src3)",
+             [(set IntRegs:$dst, (add IntRegs:$src1,
+                                      (shl IntRegs:$src2,
+                                           u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = asl($src1, $src2)",
+             [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = asr($src1, $src2)",
+             [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = lsr($src1, $src2)",
+             [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+           "$dst = lsl($src1, $src2)",
+           [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                 IntRegs:$src2),
+              "$dst = asr($src1, $src2)",
+              [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                 IntRegs:$src2),
+              "$dst = lsr($src1, $src2)",
+              [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+                           [SDNPHasChain]>;
+
+let hasSideEffects = 1 in
+def BARRIER : STInst<(outs), (ins),
+                     "barrier",
+                     [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+             "$dst = #$src1",
+             [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                        IntRegs:$src2,
+                                                        IntRegs:$src3),
+                     "Error; should not emit",
+                     [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                                 IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+                              (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+                     "Error; should not emit",
+                     [(set IntRegs:$dst, (select PredRegs:$src1,
+                                                 s12ImmPred:$src2,
+                                                 s12ImmPred:$src3))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+             "$dst = add($src1)",
+             [(set IntRegs:$dst, ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+                      "loop0($offset, #$src2)",
+                      []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+                      "loop0($offset, $src2)",
+                      []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+    Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+                      ":endloop0",
+                      []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                             SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32",     SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP",     SDTHexagonCONST32>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+              "$dst = CONST32(#$global)",
+              [(set IntRegs:$dst,
+              (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+                  "$dst = CONST32(#$global)",
+                  [(set IntRegs:$dst,
+                  (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+                     "$dst = CONST32(#$jt)",
+                     [(set IntRegs:$dst,
+                     (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+                    "$dst = CONST32(#$global)",
+                    [(set IntRegs:$dst,
+                    (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+                       "$dst = CONST32(#$global)",
+                       [(set IntRegs:$dst, imm:$global) ]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+                    "$dst = CONST32($label)",
+                    [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+                       "$dst = CONST64(#$global)",
+                       [(set DoubleRegs:$dst, imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+                  "$dst = xor($dst, $dst)",
+                  [(set PredRegs:$dst, 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+                 "$dst = mpy($src1, $src2)",
+                 [(set IntRegs:$dst,
+                       (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
+                                             (i64 (sext IntRegs:$src2)))),
+                                        (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                    [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet    : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+                     [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                        "Should never be emitted",
+                        [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                      "Should never be emitted",
+                      [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+              "callr $dst",
+              []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+             "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call IntRegs:$dst),
+      (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+      (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+      (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+      (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+      (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet IntRegs:$dst),
+      (TCRETURNR IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
+def : Pat <(and IntRegs:$src1, 65535),
+      (ZXTH IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and IntRegs:$src1, 255),
+      (ZXTB IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+//     Add(p1, false) should never be produced,
+//     if it does, it got to be mapped to NOOP.
+def : Pat <(add PredRegs:$src1, -1),
+      (NOT_pp PredRegs:$src1)>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+//   p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
+                   IntRegs:$src4),
+      (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
+                      IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
+      (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+      (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+      (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1,
+                  (add (HexagonCONST32_GP tglobaladdr:$global),
+                       u16ImmPred:$offset)),
+      (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress) -> memd(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+                                      u16ImmPred:$offset)),
+      (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+                          (add (HexagonCONST32_GP tglobaladdr:$global),
+                               u16ImmPred:$offset)),
+      (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+                          (HexagonCONST32_GP tglobaladdr:$global)),
+      (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+                         (add (HexagonCONST32_GP tglobaladdr:$global),
+                              u16ImmPred:$offset)),
+      (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+                         (HexagonCONST32_GP tglobaladdr:$global)),
+      (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
+                      u16ImmPred:$offset)),
+      (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDw_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+                           u16ImmPred:$offset))),
+      (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+      (LDd_GP tglobaladdr:$global)>;
+
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+      (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset)),
+      (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDrih_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset)),
+      (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriuh_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriuh_GP tglobaladdr:$global, 0)>;
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriub_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memub(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDub_GP tglobaladdr:$global)>;
+
+// When the Interprocedural Global Variable optimizer realizes that a
+// certain global variable takes only two constant values, it shrinks the
+// global to a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDub_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memuh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDuh_GP tglobaladdr:$global)>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+      (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
+      (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
+      (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
+      (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// We want to prevent emiting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_PredNot.
+def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_PredNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
+      (JMP_PredNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
+      (JMP_PredNot PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
+      (JMP_Pred PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
+      (JMP_PredNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_Pred (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+                   bb:$offset),
+      (JMP_PredNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+                   bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_PredNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+                   bb:$offset),
+      (JMP_PredNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
+                   bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.  
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+      (COMBINE_rr
+      (MUX_rr PredRegs:$src1,
+      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
+      (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
+      (MUX_rr PredRegs:$src1,
+      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+      (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+      (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
+             (AND_pp (NOT_pp PredRegs:$src1), PredRegs:$src3))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+      (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc DoubleRegs:$src)),
+      (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def :  Pat<(i1 (trunc DoubleRegs:$src)),
+       (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
+      (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+                                                     subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
+      (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+                                                     subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+let AddedComplexity = 100 in
+// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
+// memw(#foo) = r0
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+      (STb_GP tglobaladdr:$global, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext IntRegs:$src1)),
+      (i64 (SXTW IntRegs:$src1))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
+      (i1 (NOT_Ps (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_Ps (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_Ps (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
+      (i1 (NOT_Ps(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_Ps(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
+      (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_Ps(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_Ps(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+
+def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
+      (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_Ps(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
+      (i1 (NOT_Ps (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
+// rs < rt -> rs < rt. Let assembler map it.
+def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
+      (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
+      (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_Ps (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_Ps (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_Ps (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def:  Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+      (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def:  Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+      (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def:  Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+      (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def:  Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+      (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext PredRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+
+// i32 -> i64
+def : Pat <(i64 (zext IntRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// i8 -> i64
+def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+
+// i16 -> i64
+def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+
+// i32 -> i64
+def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+      (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext PredRegs:$src1)),
+      (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+// anyext i16 -> i64.
+def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext IntRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
+      (MPYU64_acc(COMBINE_rr (TFRI 0),
+                 (EXTRACT_SUBREG
+                 (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
+                                 (EXTRACT_SUBREG (LSRd_ri(MPYU64
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_loreg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                                  32) ,subreg_loreg)),
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_hireg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                              (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                              (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                          32),subreg_loreg)),
+                 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+                 (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+      )>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
+      (MPY64_acc(COMBINE_rr (TFRI 0),
+                 (EXTRACT_SUBREG
+                 (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
+                                 (EXTRACT_SUBREG (LSRd_ri(MPYU64
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_loreg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                                  32) ,subreg_loreg)),
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_hireg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                              (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                              (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                          32),subreg_loreg)),
+                 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+                 (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+      )>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+                                 SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                     s16Imm:$src2),
+                  "$dst = add($src1, #$src2)",
+                  [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
+                                                           s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+                "$dst = $src1",
+                [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
+      (TFR IntRegs:$src1)>;
+
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+                   "jumpr $src",
+                   [(HexagonBR_JT IntRegs:$src)]>;
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+          (CONST32_set_jt tjumptable:$dst)>;
+
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 0000000..a73897e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,134 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+                P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+                P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+              "callr $dst",
+              []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if ($src1.new) jumpr:t $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cNotPnewt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if (!$src1.new) jumpr:t $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if ($src1.new) jumpr:nt $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cNotPnewNt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if (!$src1.new) jumpr:nt $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = max($src2, $src1)",
+              [(set DoubleRegs:$dst, (select (i1 (setlt DoubleRegs:$src2,
+                                                        DoubleRegs:$src1)),
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = min($src2, $src1)",
+              [(set DoubleRegs:$dst, (select (i1 (setgt DoubleRegs:$src2,
+                                                        DoubleRegs:$src1)),
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq IntRegs:$src1, 0)), bb:$offset),
+//      (JMP_RegEzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne IntRegs:$src1, 0)), bb:$offset),
+//      (JMP_RegNzt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle IntRegs:$src1, 0)), bb:$offset),
+//      (JMP_RegLezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge IntRegs:$src1, 0)), bb:$offset),
+//      (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt IntRegs:$src1, -1)), bb:$offset),
+//      (JMP_RegGezt IntRegs:$src1, bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call IntRegs:$dst),
+      (CALLRv3 IntRegs:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+      (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+      (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 0000000..24218d0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,3392 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+// Shift halfword.
+
+def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Sign extend.
+
+def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Zero exten.
+
+let neverHasSideEffects = 1 in
+def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1 in
+def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+//// Load doubleword.
+// Rdd=memd(Re=#U6)
+
+// Rdd=memd(Rs+Rt<<#u2)
+// Special case pattern for indexed load without offset which is easier to
+// match. AddedComplexity of this pattern should be lower than base+offset load
+// and lower yet than the more generic version with offset/shift below
+// Similar approach is taken for all other base+index loads.
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memd($src1+$src2<<#0)",
+                    [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+                                                      IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memd($src1+$src2<<#$offset)",
+                    [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+                                                 (shl IntRegs:$src2,
+                                                      u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load doubleword conditionally.
+// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rdd=memd(Rt<<#u2+#U6)
+
+//// Load byte.
+// Rd=memb(Re=#U6)
+
+// Rd=memb(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memb($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+                                                         IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memub($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+                                                         IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memub($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                        IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memb($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst,
+                          (sextloadi8 (add IntRegs:$src1,
+                                           (shl IntRegs:$src2,
+                                                u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memub($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst,
+                          (zextloadi8 (add IntRegs:$src1,
+                                           (shl IntRegs:$src2,
+                                                u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memub($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                   (shl IntRegs:$src2,
+                                                        u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load byte conditionally.
+// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+//// Load unsigned byte conditionally.
+// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memb(Rt<<#u2+#U6)
+
+//// Load halfword
+// Rd=memh(Re=#U6)
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memh($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+                                                          IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memuh($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+                                                          IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memuh($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+                                                         IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memh($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst,
+                          (sextloadi16 (add IntRegs:$src1,
+                                            (shl IntRegs:$src2,
+                                                 u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memuh($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst,
+                          (zextloadi16 (add IntRegs:$src1,
+                                            (shl IntRegs:$src2,
+                                                 u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memuh($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst,
+                          (extloadi16 (add IntRegs:$src1,
+                                           (shl IntRegs:$src2,
+                                                u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load halfword conditionally.
+// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+//// Load unsigned halfword conditionally.
+// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memh(Rt<<#u2+#U6)
+
+//// Load word.
+// Rd=memw(Re=#U6)
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memw($src1+$src2<<#0)",
+                    [(set IntRegs:$dst, (load (add IntRegs:$src1,
+                                                   IntRegs:$src2)))]>,
+                    Requires<[HasV4T]>;
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memw($src1+$src2<<#$offset)",
+                    [(set IntRegs:$dst, (load (add IntRegs:$src1,
+                                              (shl IntRegs:$src2,
+                                                   u2ImmPred:$offset))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load word conditionally.
+// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
+// if (Pv) Rd=memw(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memw(Rt<<#u2+#U6)
+
+
+// Post-inc Load, Predicated, Dot new
+
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if ($src1.new) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if (!$src1.new) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1.new) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1.new) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1.new) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1.new) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1.new) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1.new) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1.new) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if ($src1.new) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if (!$src1.new) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+///    post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+///    last operand.
+///
+
+// Store doubleword.
+// memd(Re=#U6)=Rtt
+// TODO: needs to be implemented
+
+// memd(Rs+#s11:3)=Rtt
+// memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
+            "memd($src1+$src2<<#$src3) = $src4",
+            [(store DoubleRegs:$src4, (add IntRegs:$src1,
+                                      (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memd(Ru<<#u2+#U6)=Rtt
+let AddedComplexity = 10 in
+def STrid_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+            "memd($src1<<#$src2+#$src3) = $src4",
+            [(store DoubleRegs:$src4, (shl IntRegs:$src1,
+                                      (add u2ImmPred:$src2,
+                                           u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if ($src1.new) memd($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if (!$src1.new) memd($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if ($src1.new) memd($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if (!$src1.new) memd($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
+// if (Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if ($src1.new) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+// if (!Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if (!$src1.new) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store byte.
+// memb(Re=#U6)=Rt
+// TODO: needs to be implemented.
+// memb(Rs+#s11:0)=Rt
+// memb(Rs+#u6:0)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
+            "memb($src1+#$src2) = #$src3",
+            [(truncstorei8 s8ImmPred:$src3, (add IntRegs:$src1,
+                                                 u6_0ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memb($src1+$src2<<#$src3) = $src4",
+            [(truncstorei8 IntRegs:$src4, (add IntRegs:$src1,
+                                          (shl IntRegs:$src2,
+                                               u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrib_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memb($src1<<#$src2+#$src3) = $src4",
+            [(truncstorei8 IntRegs:$src4, (shl IntRegs:$src1,
+                                          (add u2ImmPred:$src2,
+                                               u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Rt
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
+// if (Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if ($src1) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memb($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memb($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memb($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
+// if (Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+// if (Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1.new) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+// if (!Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1.new) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store halfword.
+// memh(Re=#U6)=Rt.H
+// TODO: needs to be implemented
+
+// memh(Re=#U6)=Rt
+// TODO: needs to be implemented
+
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+#s11:1)=Rt
+// memh(Rs+#u6:1)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
+            "memh($src1+#$src2) = #$src3",
+            [(truncstorei16 s8ImmPred:$src3, (add IntRegs:$src1,
+                                                  u6_1ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memh($src1+$src2<<#$src3) = $src4",
+            [(truncstorei16 IntRegs:$src4, (add IntRegs:$src1,
+                                          (shl IntRegs:$src2,
+                                               u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrih_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memh($src1<<#$src2+#$src3) = $src4",
+            [(truncstorei16 IntRegs:$src4, (shl IntRegs:$src1,
+                                          (add u2ImmPred:$src2,
+                                               u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt.H
+// memh(gp+#u16:1)=Rt
+
+
+// Store halfword conditionally.
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
+// if (Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if ($src1) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memh($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memh($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memh($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
+// if (Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
+def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+// if (Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1.new) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+// if (!Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1.new) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// memw(Rs+#s11:2)=Rt
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
+            "memw($src1+#$src2) = #$src3",
+            [(store s8ImmPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memw($src1+$src2<<#$src3) = $src4",
+            [(store IntRegs:$src4, (add IntRegs:$src1,
+                                    (shl IntRegs:$src2, u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STriw_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memw($src1<<#$src2+#$src3) = $src4",
+            [(store IntRegs:$src4, (shl IntRegs:$src1,
+                                   (add u2ImmPred:$src2, u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+// memw(gp+#u16:2)=Rt
+
+
+// Store word conditionally.
+// if ([!]Pv[.new]) memw(#u6)=Rt
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
+// if (Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if ($src1) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memw($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memw($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memw($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
+// if (Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+// if (Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1.new) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+// if (!Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1.new) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// Store new-value byte.
+
+// memb(Re=#U6)=Nt.new
+// memb(Rs+#s11:0)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+            "memb($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrib_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+            "memb($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memb($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memb($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
+            "memb($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+
+// memb(gp+#u16:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memb(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// Store new-value byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Nt.new
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
+// if (Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1.new) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1.new) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store new-value halfword.
+// memh(Re=#U6)=Nt.new
+// memh(Rs+#s11:1)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+            "memh($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrih_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+            "memh($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memh($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memh($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
+            "memh($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memh(gp+#u16:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memh(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// Store new-value halfword conditionally.
+
+// if ([!]Pv[.new]) memh(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
+// if (Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1.new) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1.new) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store new-value word.
+
+// memw(Re=#U6)=Nt.new
+// memw(Rs+#s11:2)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STriw_nv_V4 : NVInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memw($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STriw_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+            "memw($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memw($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memw($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
+            "memw($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+// memw(gp+#u16:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memw(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// Store new-value word conditionally.
+
+// if ([!]Pv[.new]) memw(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
+// if (Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1.new) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1 in
+def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1.new) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+//  Add and accumulate.
+//  Rd=add(Rs,add(Ru,#s6))
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+          "$dst = add($src1, add($src2, #$src3))",
+          [(set IntRegs:$dst,
+           (add IntRegs:$src1, (add IntRegs:$src2, s6ImmPred:$src3)))]>,
+          Requires<[HasV4T]>;
+
+//  Rd=add(Rs,sub(#s6,Ru))
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          "$dst = add($src1, sub(#$src2, $src3))",
+          [(set IntRegs:$dst,
+           (add IntRegs:$src1, (sub s6ImmPred:$src2, IntRegs:$src3)))]>,
+          Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+//  Rd=add(Rs,sub(#s6,Ru))
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          "$dst = add($src1, sub(#$src2, $src3))",
+          [(set IntRegs:$dst,
+           (sub (add IntRegs:$src1, s6ImmPred:$src2), IntRegs:$src3))]>,
+          Requires<[HasV4T]>;
+
+
+//  Add or subtract doublewords with carry.
+//TODO:
+//  Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+//  Rdd=sub(Rss,Rtt,Px):carry
+
+
+//  Logical doublewords.
+//  Rdd=and(Rtt,~Rss)
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          "$dst = and($src1, ~$src2)",
+          [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+                                      (not DoubleRegs:$src2)))]>,
+          Requires<[HasV4T]>;
+
+//  Rdd=or(Rtt,~Rss)
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          "$dst = or($src1, ~$src2)",
+          [(set DoubleRegs:$dst,
+           (or DoubleRegs:$src1, (not DoubleRegs:$src2)))]>,
+          Requires<[HasV4T]>;
+
+
+//  Logical-logical doublewords.
+//  Rxx^=xor(Rss,Rtt)
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+          "$dst ^= xor($src2, $src3)",
+          [(set DoubleRegs:$dst,
+           (xor DoubleRegs:$src1, (xor DoubleRegs:$src2, DoubleRegs:$src3)))],
+          "$src1 = $dst">,
+          Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst = or($src1, and($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= and($src2, $src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= and($src2, $src3)",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=and(Rs,Rt)
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= and($src2, $src3)",
+            [(set IntRegs:$dst,
+             (xor IntRegs:$src1, (and IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= and($src2, ~$src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= and($src2, ~$src3)",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= and($src2, ~$src3)",
+            [(set IntRegs:$dst,
+             (xor IntRegs:$src1, (and IntRegs:$src2, (not IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= or($src2, $src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= or($src2, $src3)",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=or(Rs,Rt)
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= or($src2, $src3)",
+            [(set IntRegs:$dst,
+             (xor IntRegs:$src1, (or IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= xor($src2, $src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= xor($src2, $src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= xor($src2, $src3)",
+            [(set IntRegs:$dst,
+             (and IntRegs:$src1, (xor IntRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst |= and($src2, #$src3)",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=or(Rs,#s10)
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst |= or($src2, #$src3)",
+            [(set IntRegs:$dst,
+             (or IntRegs:$src1, (and IntRegs:$src2, s10ImmPred:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//    Modulo wrap
+//        Rd=modwrap(Rs,Rt)
+//    Round
+//        Rd=cround(Rs,#u5)
+//        Rd=cround(Rs,Rt)
+//        Rd=round(Rs,#u5)[:sat]
+//        Rd=round(Rs,Rt)[:sat]
+//    Vector reduce add unsigned halfwords
+//        Rd=vraddh(Rss,Rtt)
+//    Vector add bytes
+//        Rdd=vaddb(Rss,Rtt)
+//    Vector conditional negate
+//        Rdd=vcnegh(Rss,Rt)
+//        Rxx+=vrcnegh(Rss,Rt)
+//    Vector maximum bytes
+//        Rdd=vmaxb(Rtt,Rss)
+//    Vector reduce maximum halfwords
+//        Rxx=vrmaxh(Rss,Ru)
+//        Rxx=vrmaxuh(Rss,Ru)
+//    Vector reduce maximum words
+//        Rxx=vrmaxuw(Rss,Ru)
+//        Rxx=vrmaxw(Rss,Ru)
+//    Vector minimum bytes
+//        Rdd=vminb(Rtt,Rss)
+//    Vector reduce minimum halfwords
+//        Rxx=vrminh(Rss,Ru)
+//        Rxx=vrminuh(Rss,Ru)
+//    Vector reduce minimum words
+//        Rxx=vrminuw(Rss,Ru)
+//        Rxx=vrminw(Rss,Ru)
+//    Vector subtract bytes
+//        Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+            (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+            "$dst = add(#$src1, mpyi($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (add (mul IntRegs:$src2, u6ImmPred:$src3), u6ImmPred:$src1))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+            (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst = add(#$src1, mpyi($src2, $src3))",
+            [(set IntRegs:$dst,
+             (add (mul IntRegs:$src2, IntRegs:$src3), u6ImmPred:$src1))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            "$dst = add($src1, mpyi(#$src2, $src3))",
+            [(set IntRegs:$dst,
+             (add IntRegs:$src1, (mul IntRegs:$src3, u6_2ImmPred:$src2)))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+            "$dst = add($src1, mpyi($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (add IntRegs:$src1, (mul IntRegs:$src2, u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst = add($src1, mpyi($src2, $src3))",
+            [(set IntRegs:$dst,
+             (add IntRegs:$src1, (mul IntRegs:$src2, IntRegs:$src3)))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = add(#$src1, asl($src2, #$src3))",
+            [(set IntRegs:$dst,
+              (add (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = add(#$src1, lsr($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (add (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = sub(#$src1, asl($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (sub (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = sub(#$src1, lsr($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (sub (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = and(#$src1, asl($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (and (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = and(#$src1, lsr($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (and (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = or(#$src1, asl($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (or (shl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = or(#$src1, lsr($src2, #$src3))",
+            [(set IntRegs:$dst,
+             (or (srl IntRegs:$src2, u5ImmPred:$src3), u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+            "$dst = lsl(#$src1, $src2)",
+            [(set IntRegs:$dst, (shl s6ImmPred:$src1, IntRegs:$src2))]>,
+            Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= asl($src2, $src3)",
+            [(set DoubleRegs:$dst,
+             (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= asr($src2, $src3)",
+            [(set DoubleRegs:$dst,
+             (xor DoubleRegs:$src1, (sra DoubleRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= lsl($src2, $src3)",
+            [(set DoubleRegs:$dst,
+             (xor DoubleRegs:$src1, (shl DoubleRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= lsr($src2, $src3)",
+            [(set DoubleRegs:$dst,
+             (xor DoubleRegs:$src1, (srl DoubleRegs:$src2, IntRegs:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word
+//
+//  Implemented:
+//     MEMw_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
+//     MEMw_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
+//     MEMw_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
+//     MEMw_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
+//     MEMw_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
+//     MEMw_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
+//     MEMw_ADDi_V4          : memw(Rs+#u6:2)+=#U5
+//     MEMw_SUBi_V4          : memw(Rs+#u6:2)-=#U5
+//     MEMw_ADDr_V4          : memw(Rs+#u6:2)+=Rt
+//     MEMw_SUBr_V4          : memw(Rs+#u6:2)-=Rt
+//     MEMw_CLRr_V4          : memw(Rs+#u6:2)&=Rt
+//     MEMw_SETr_V4          : memw(Rs+#u6:2)|=Rt
+//
+//   Not implemented:
+//     MEMw_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMw_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
+//     MEMw_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMw_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMw_ADDSUBi_indexed_V4:
+//   pseudo operation for MEMw_ADDi_indexed_V4 and
+//   MEMw_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+m6ImmPred:$addend),
+                    (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
+            "memw($base+#$offset) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
+            "memw($base+#$offset) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
+            "memw($base+#$offset) += $addend",
+            [(store (add (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$addend),
+                    (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
+            "memw($base+#$offset) -= $subend",
+            [(store (sub (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$subend),
+                    (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
+            "memw($base+#$offset) += $andend",
+            [(store (and (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+IntRegs:$andend),
+                    (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
+            "memw($base+#$offset) |= $orend",
+            [(store (or (load (add IntRegs:$base, u6_2ImmPred:$offset)),
+                        IntRegs:$orend),
+                    (add IntRegs:$base, u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMw_ADDSUBi_V4:
+//   Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memw($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memw($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memw($addr) += $addend",
+            [(store (add (load ADDRriU6_2:$addr), IntRegs:$addend),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memw($addr) -= $subend",
+            [(store (sub (load ADDRriU6_2:$addr), IntRegs:$subend),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memw($addr) &= $andend",
+            [(store (and (load ADDRriU6_2:$addr), IntRegs:$andend),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memw($addr) |= $orend",
+            [(store (or (load ADDRriU6_2:$addr), IntRegs:$orend),
+ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Halfword
+//
+//  Implemented:
+//     MEMh_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
+//     MEMh_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
+//     MEMh_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
+//     MEMh_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
+//     MEMh_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
+//     MEMh_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
+//     MEMh_ADDi_V4          : memw(Rs+#u6:2)+=#U5
+//     MEMh_SUBi_V4          : memw(Rs+#u6:2)-=#U5
+//     MEMh_ADDr_V4          : memw(Rs+#u6:2)+=Rt
+//     MEMh_SUBr_V4          : memw(Rs+#u6:2)-=Rt
+//     MEMh_CLRr_V4          : memw(Rs+#u6:2)&=Rt
+//     MEMh_SETr_V4          : memw(Rs+#u6:2)|=Rt
+//
+//   Not implemented:
+//     MEMh_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMh_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
+//     MEMh_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMh_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMh_ADDSUBi_indexed_V4:
+//   Pseudo operation for MEMh_ADDi_indexed_V4 and
+//   MEMh_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+                                                   u6_1ImmPred:$offset)),
+                                 m6ImmPred:$addend),
+                            (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
+            "memh($base+#$offset) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
+            "memh($base+#$offset) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
+            "memh($base+#$offset) += $addend",
+            [(truncstorei16 (add (sextloadi16 (add IntRegs:$base,
+                                                   u6_1ImmPred:$offset)),
+                                 IntRegs:$addend),
+                            (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
+            "memh($base+#$offset) -= $subend",
+            [(truncstorei16 (sub (sextloadi16 (add IntRegs:$base,
+                                                   u6_1ImmPred:$offset)),
+                                 IntRegs:$subend),
+                            (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
+            "memh($base+#$offset) += $andend",
+            [(truncstorei16 (and (sextloadi16 (add IntRegs:$base,
+                                                   u6_1ImmPred:$offset)),
+                                 IntRegs:$andend),
+                            (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
+            "memh($base+#$offset) |= $orend",
+            [(truncstorei16 (or (sextloadi16 (add IntRegs:$base,
+                                              u6_1ImmPred:$offset)),
+                             IntRegs:$orend),
+                            (add IntRegs:$base, u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMh_ADDSUBi_V4:
+//   Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+                                 m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memh($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memh($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memh($addr) += $addend",
+            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+                                 IntRegs:$addend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memh($addr) -= $subend",
+            [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
+                                 IntRegs:$subend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memh($addr) &= $andend",
+            [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
+                                 IntRegs:$andend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memh($addr) |= $orend",
+            [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
+                                IntRegs:$orend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Byte
+//
+//  Implemented:
+//     MEMb_ADDi_indexed_V4  : memb(Rs+#u6:0)+=#U5
+//     MEMb_SUBi_indexed_V4  : memb(Rs+#u6:0)-=#U5
+//     MEMb_ADDr_indexed_V4  : memb(Rs+#u6:0)+=Rt
+//     MEMb_SUBr_indexed_V4  : memb(Rs+#u6:0)-=Rt
+//     MEMb_CLRr_indexed_V4  : memb(Rs+#u6:0)&=Rt
+//     MEMb_SETr_indexed_V4  : memb(Rs+#u6:0)|=Rt
+//     MEMb_ADDi_V4          : memb(Rs+#u6:0)+=#U5
+//     MEMb_SUBi_V4          : memb(Rs+#u6:0)-=#U5
+//     MEMb_ADDr_V4          : memb(Rs+#u6:0)+=Rt
+//     MEMb_SUBr_V4          : memb(Rs+#u6:0)-=Rt
+//     MEMb_CLRr_V4          : memb(Rs+#u6:0)&=Rt
+//     MEMb_SETr_V4          : memb(Rs+#u6:0)|=Rt
+//
+//   Not implemented:
+//     MEMb_CLRi_indexed_V4  : memb(Rs+#u6:0)=clrbit(#U5)
+//     MEMb_SETi_indexed_V4  : memb(Rs+#u6:0)=setbit(#U5)
+//     MEMb_CLRi_V4          : memb(Rs+#u6:0)=clrbit(#U5)
+//     MEMb_SETi_V4          : memb(Rs+#u6:0)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMb_ADDSUBi_indexed_V4:
+//   Pseudo operation for MEMb_ADDi_indexed_V4 and
+//   MEMb_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+                                                 u6_0ImmPred:$offset)),
+                                m6ImmPred:$addend),
+                           (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
+            "memb($base+#$offset) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
+            "memb($base+#$offset) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
+            "memb($base+#$offset) += $addend",
+            [(truncstorei8 (add (sextloadi8 (add IntRegs:$base,
+                                                 u6_0ImmPred:$offset)),
+                                IntRegs:$addend),
+                           (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
+            "memb($base+#$offset) -= $subend",
+            [(truncstorei8 (sub (sextloadi8 (add IntRegs:$base,
+                                                 u6_0ImmPred:$offset)),
+                                IntRegs:$subend),
+                           (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
+            "memb($base+#$offset) += $andend",
+            [(truncstorei8 (and (sextloadi8 (add IntRegs:$base,
+                                                 u6_0ImmPred:$offset)),
+                                IntRegs:$andend),
+                           (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
+            "memb($base+#$offset) |= $orend",
+            [(truncstorei8 (or (sextloadi8 (add IntRegs:$base,
+                                                u6_0ImmPred:$offset)),
+                                IntRegs:$orend),
+                           (add IntRegs:$base, u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMb_ADDSUBi_V4:
+//   Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+                                m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memb($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memb($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memb($addr) += $addend",
+            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+                                IntRegs:$addend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memb($addr) -= $subend",
+            [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
+                                IntRegs:$subend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memb($addr) &= $andend",
+            [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
+                                IntRegs:$andend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memb($addr) |= $orend",
+            [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
+                                IntRegs:$orend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Pd=cmpb.eq(Rs,#u8)
+let isCompare = 1 in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u8Imm:$src2),
+            "$dst = cmpb.eq($src1, #$src2)",
+            [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 255),
+                                        u8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.eq($src1, $src2)",
+            [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+                                                  IntRegs:$src2),
+                                             255),
+                                        0))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.eq($src1, $src2)",
+            [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 24)),
+                                        (shl IntRegs:$src2, (i32 24))))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, s32Imm:$src2),
+            "$dst = cmpb.gt($src1, #$src2)",
+            [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+                                        s32_24ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.gt($src1, $src2)",
+            [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 24)),
+                                        (shl IntRegs:$src2, (i32 24))))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u7Imm:$src2),
+            "$dst = cmpb.gtu($src1, #$src2)",
+            [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+                                         u7ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.gtu($src1, $src2)",
+            [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 255),
+                                         (and IntRegs:$src2, 255)))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1 in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u16Imm:$src2),
+            "$dst = cmph.eq($src1, #$src2)",
+            [(set PredRegs:$dst, (seteq (and IntRegs:$src1, 65535),
+                                        u16_s8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+//   r0=xor(r0,r1)
+//   r0=and(r0,#0xffff)
+//   p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.eq($src1, $src2)",
+            [(set PredRegs:$dst, (seteq (and (xor IntRegs:$src1,
+                                                  IntRegs:$src2),
+                                             65535),
+                                        0))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+//   r0=asl(r0,16)
+//   r1=asl(r1,16)
+//   p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.eq($src1, $src2)",
+            [(set PredRegs:$dst, (seteq (shl IntRegs:$src1, (i32 16)),
+                                        (shl IntRegs:$src2, (i32 16))))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, s32Imm:$src2),
+            "$dst = cmph.gt($src1, #$src2)",
+            [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+                                        s32_16s8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.gt($src1, $src2)",
+            [(set PredRegs:$dst, (setgt (shl IntRegs:$src1, (i32 16)),
+                                        (shl IntRegs:$src2, (i32 16))))]>,
+            Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.gtu($src1, $src2)",
+            [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+                                         (and IntRegs:$src2, 65535)))]>,
+            Requires<[HasV4T]>;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u7Imm:$src2),
+            "$dst = cmph.gtu($src1, #$src2)",
+            [(set PredRegs:$dst, (setugt (and IntRegs:$src1, 65535),
+                                         u7ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+//    dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+            "dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+            "if ($src1) dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                     i32imm:$amt1),
+            "if (!$src1) dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                     i32imm:$amt1),
+            "if ($src1.new) dealloc_return:nt",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                        i32imm:$amt1),
+            "if (!$src1.new) dealloc_return:nt",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                    i32imm:$amt1),
+            "if ($src1.new) dealloc_return:t",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                       i32imm:$amt1),
+            "if (!$src1.new) dealloc_return:t",
+            []>,
+            Requires<[HasV4T]>;
+}
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 0000000..1328eba
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3462 @@
+//===- HexagonIntrinsics.td - Instruction intrinsics -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                      IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+                                       IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                       s8Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+             [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+             !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+             !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+             [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+                                          DoubleRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+                                           DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.H, $src2.H):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.L, $src2.H):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.H, $src2.L):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.L, $src2.L):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+          !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                     imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "$src")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "$src")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                        DoubleRegs:$src1, IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2, u5Imm:$src3),
+              !strconcat("$dst = ", !strconcat(opc ,
+                                               "($src1, #$src2, #$src3)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2, imm:$src3))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        DoubleRegs:$src2),
+              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         DoubleRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2, u6Imm:$src3),
+              !strconcat("$dst = ", !strconcat(opc ,
+                                               "($src1, #$src2, #$src3)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                            imm:$src2, imm:$src3))],
+              "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                            DoubleRegs:$src1,
+                                            DoubleRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+                                       u6Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+                                        imm:$src3))]>;
+
+class di_SInst_didisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       u3Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u5Imm:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                            DoubleRegs:$src1, imm:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u5Imm:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                            imm:$src2))],
+              "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc ,
+                                               "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc ,
+                                               "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc ,
+                                               "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2.L):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2.H):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        s8Imm:$src3),
+             !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        imm:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        s8Imm:$src3),
+             !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        imm:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u4Imm:$src2, u5Imm:$src3),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1, #$src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          imm:$src2, imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        u8Imm:$src3),
+               !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                          imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        u8Imm:$src3),
+               !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                          imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc ,
+                                     "($src1.H, $src2.H):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+                                           DoubleRegs:$src1, DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):rnd:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+                                           DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ",
+                        !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def Hexagon_A2_add:
+  si_ALU32_sisi                   <"add",      int_hexagon_A2_add>;
+def Hexagon_A2_addi:
+  si_ALU32_sis16                  <"add",      int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def Hexagon_A2_and:
+  si_ALU32_sisi                   <"and",      int_hexagon_A2_and>;
+def Hexagon_A2_andir:
+  si_ALU32_sis10                  <"and",      int_hexagon_A2_andir>;
+def Hexagon_A2_not:
+  si_ALU32_si                     <"not",      int_hexagon_A2_not>;
+def Hexagon_A2_or:
+  si_ALU32_sisi                   <"or",       int_hexagon_A2_or>;
+def Hexagon_A2_orir:
+  si_ALU32_sis10                  <"or",       int_hexagon_A2_orir>;
+def Hexagon_A2_xor:
+  si_ALU32_sisi                   <"xor",      int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def Hexagon_A2_neg:
+  si_ALU32_si                     <"neg",      int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def Hexagon_A2_sub:
+  si_ALU32_sisi                   <"sub",      int_hexagon_A2_sub>;
+def Hexagon_A2_subri:
+  si_ALU32_s10si                  <"sub",      int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def Hexagon_A2_tfril:
+  si_lo_ALU32_siu16               <"",         int_hexagon_A2_tfril>;
+def Hexagon_A2_tfrih:
+  si_hi_ALU32_siu16               <"",         int_hexagon_A2_tfrih>;
+def Hexagon_A2_tfrsi:
+  si_ALU32_s16                    <"",         int_hexagon_A2_tfrsi>;
+def Hexagon_A2_tfrpi:
+  di_ALU32_s8                     <"",         int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def Hexagon_A2_tfr:
+  si_ALU32_si_tfr                  <"",        int_hexagon_A2_tfr>;
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def Hexagon_A2_combinew:
+  di_ALU32_sisi                   <"combine",  int_hexagon_A2_combinew>;
+def Hexagon_A2_combine_hh:
+  si_MInst_sisi_hh                <"combine",  int_hexagon_A2_combine_hh>;
+def Hexagon_A2_combine_lh:
+  si_MInst_sisi_lh                <"combine",  int_hexagon_A2_combine_lh>;
+def Hexagon_A2_combine_hl:
+  si_MInst_sisi_hl                <"combine",  int_hexagon_A2_combine_hl>;
+def Hexagon_A2_combine_ll:
+  si_MInst_sisi_ll                <"combine",  int_hexagon_A2_combine_ll>;
+def Hexagon_A2_combineii:
+  di_MInst_s8s8                   <"combine",  int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def Hexagon_C2_mux:
+  si_ALU32_qisisi                 <"mux",      int_hexagon_C2_mux>;
+def Hexagon_C2_muxri:
+  si_ALU32_qis8si                 <"mux",      int_hexagon_C2_muxri>;
+def Hexagon_C2_muxir:
+  si_ALU32_qisis8                 <"mux",      int_hexagon_C2_muxir>;
+def Hexagon_C2_muxii:
+  si_ALU32_qis8s8                 <"mux",      int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def Hexagon_A2_aslh:
+  si_ALU32_si                     <"aslh",     int_hexagon_A2_aslh>;
+def Hexagon_A2_asrh:
+  si_ALU32_si                     <"asrh",     int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+  si_ALU32_si                     <"asrh",     int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def Hexagon_A2_sxth:
+  si_ALU32_si                     <"sxth",     int_hexagon_A2_sxth>;
+def Hexagon_A2_sxtb:
+  si_ALU32_si                     <"sxtb",     int_hexagon_A2_sxtb>;
+def Hexagon_A2_zxth:
+  si_ALU32_si                     <"zxth",     int_hexagon_A2_zxth>;
+def Hexagon_A2_zxtb:
+  si_ALU32_si                     <"zxtb",     int_hexagon_A2_zxtb>;
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def Hexagon_C2_cmpeq:
+  qi_ALU32_sisi                   <"cmp.eq",   int_hexagon_C2_cmpeq>;
+def Hexagon_C2_cmpeqi:
+  qi_ALU32_sis10                  <"cmp.eq",   int_hexagon_C2_cmpeqi>;
+def Hexagon_C2_cmpgei:
+  qi_ALU32_sis8                   <"cmp.ge",   int_hexagon_C2_cmpgei>;
+def Hexagon_C2_cmpgeui:
+  qi_ALU32_siu8                   <"cmp.geu",  int_hexagon_C2_cmpgeui>;
+def Hexagon_C2_cmpgt:
+  qi_ALU32_sisi                   <"cmp.gt",   int_hexagon_C2_cmpgt>;
+def Hexagon_C2_cmpgti:
+  qi_ALU32_sis10                  <"cmp.gt",   int_hexagon_C2_cmpgti>;
+def Hexagon_C2_cmpgtu:
+  qi_ALU32_sisi                   <"cmp.gtu",  int_hexagon_C2_cmpgtu>;
+def Hexagon_C2_cmpgtui:
+  qi_ALU32_siu9                   <"cmp.gtu",  int_hexagon_C2_cmpgtui>;
+def Hexagon_C2_cmplt:
+  qi_ALU32_sisi                   <"cmp.lt",   int_hexagon_C2_cmplt>;
+def Hexagon_C2_cmpltu:
+  qi_ALU32_sisi                   <"cmp.ltu",  int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+*            ALU32/VH                                               *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def Hexagon_A2_svaddh:
+  si_ALU32_sisi                   <"vaddh",    int_hexagon_A2_svaddh>;
+def Hexagon_A2_svaddhs:
+  si_ALU32_sisi_sat               <"vaddh",    int_hexagon_A2_svaddhs>;
+def Hexagon_A2_svadduhs:
+  si_ALU32_sisi_sat               <"vadduh",   int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def Hexagon_A2_svavgh:
+  si_ALU32_sisi                   <"vavgh",    int_hexagon_A2_svavgh>;
+def Hexagon_A2_svavghs:
+  si_ALU32_sisi_rnd               <"vavgh",    int_hexagon_A2_svavghs>;
+def Hexagon_A2_svnavgh:
+  si_ALU32_sisi                   <"vnavgh",   int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def Hexagon_A2_svsubh:
+  si_ALU32_sisi                   <"vsubh",    int_hexagon_A2_svsubh>;
+def Hexagon_A2_svsubhs:
+  si_ALU32_sisi_sat               <"vsubh",    int_hexagon_A2_svsubhs>;
+def Hexagon_A2_svsubuhs:
+  si_ALU32_sisi_sat               <"vsubuh",   int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+*            ALU64/ALU                                              *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addp:
+  di_ALU64_didi                   <"add",      int_hexagon_A2_addp>;
+def Hexagon_A2_addsat:
+  si_ALU64_sisi_sat               <"add",      int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_addh_l16_hl:
+  si_ALU64_sisi_l16_lh            <"add",      int_hexagon_A2_addh_l16_hl>;
+def Hexagon_A2_addh_l16_ll:
+  si_ALU64_sisi_l16_ll            <"add",      int_hexagon_A2_addh_l16_ll>;
+
+def Hexagon_A2_addh_l16_sat_hl:
+  si_ALU64_sisi_l16_sat_lh        <"add",      int_hexagon_A2_addh_l16_sat_hl>;
+def Hexagon_A2_addh_l16_sat_ll:
+  si_ALU64_sisi_l16_sat_ll        <"add",      int_hexagon_A2_addh_l16_sat_ll>;
+
+def Hexagon_A2_addh_h16_hh:
+  si_ALU64_sisi_h16_hh            <"add",      int_hexagon_A2_addh_h16_hh>;
+def Hexagon_A2_addh_h16_hl:
+  si_ALU64_sisi_h16_hl            <"add",      int_hexagon_A2_addh_h16_hl>;
+def Hexagon_A2_addh_h16_lh:
+  si_ALU64_sisi_h16_lh            <"add",      int_hexagon_A2_addh_h16_lh>;
+def Hexagon_A2_addh_h16_ll:
+  si_ALU64_sisi_h16_ll            <"add",      int_hexagon_A2_addh_h16_ll>;
+
+def Hexagon_A2_addh_h16_sat_hh:
+  si_ALU64_sisi_h16_sat_hh        <"add",      int_hexagon_A2_addh_h16_sat_hh>;
+def Hexagon_A2_addh_h16_sat_hl:
+  si_ALU64_sisi_h16_sat_hl        <"add",      int_hexagon_A2_addh_h16_sat_hl>;
+def Hexagon_A2_addh_h16_sat_lh:
+  si_ALU64_sisi_h16_sat_lh        <"add",      int_hexagon_A2_addh_h16_sat_lh>;
+def Hexagon_A2_addh_h16_sat_ll:
+  si_ALU64_sisi_h16_sat_ll        <"add",      int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def Hexagon_C2_cmpeqp:
+  qi_ALU64_didi                   <"cmp.eq",   int_hexagon_C2_cmpeqp>;
+def Hexagon_C2_cmpgtp:
+  qi_ALU64_didi                   <"cmp.gt",   int_hexagon_C2_cmpgtp>;
+def Hexagon_C2_cmpgtup:
+  qi_ALU64_didi                   <"cmp.gtu",  int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def Hexagon_A2_andp:
+  di_ALU64_didi                   <"and",      int_hexagon_A2_andp>;
+def Hexagon_A2_orp:
+  di_ALU64_didi                   <"or",       int_hexagon_A2_orp>;
+def Hexagon_A2_xorp:
+  di_ALU64_didi                   <"xor",      int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def Hexagon_A2_max:
+  si_ALU64_sisi                   <"max",      int_hexagon_A2_max>;
+def Hexagon_A2_maxu:
+  si_ALU64_sisi                   <"maxu",     int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def Hexagon_A2_min:
+  si_ALU64_sisi                   <"min",      int_hexagon_A2_min>;
+def Hexagon_A2_minu:
+  si_ALU64_sisi                   <"minu",     int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def Hexagon_A2_subp:
+  di_ALU64_didi                   <"sub",      int_hexagon_A2_subp>;
+def Hexagon_A2_subsat:
+  si_ALU64_sisi_sat               <"sub",      int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_subh_l16_hl:
+  si_ALU64_sisi_l16_lh            <"sub",      int_hexagon_A2_subh_l16_hl>;
+def Hexagon_A2_subh_l16_ll:
+  si_ALU64_sisi_l16_ll            <"sub",      int_hexagon_A2_subh_l16_ll>;
+
+def Hexagon_A2_subh_l16_sat_hl:
+  si_ALU64_sisi_l16_sat_lh        <"sub",      int_hexagon_A2_subh_l16_sat_hl>;
+def Hexagon_A2_subh_l16_sat_ll:
+  si_ALU64_sisi_l16_sat_ll        <"sub",      int_hexagon_A2_subh_l16_sat_ll>;
+
+def Hexagon_A2_subh_h16_hh:
+  si_ALU64_sisi_h16_hh            <"sub",      int_hexagon_A2_subh_h16_hh>;
+def Hexagon_A2_subh_h16_hl:
+  si_ALU64_sisi_h16_hl            <"sub",      int_hexagon_A2_subh_h16_hl>;
+def Hexagon_A2_subh_h16_lh:
+  si_ALU64_sisi_h16_lh            <"sub",      int_hexagon_A2_subh_h16_lh>;
+def Hexagon_A2_subh_h16_ll:
+  si_ALU64_sisi_h16_ll            <"sub",      int_hexagon_A2_subh_h16_ll>;
+
+def Hexagon_A2_subh_h16_sat_hh:
+  si_ALU64_sisi_h16_sat_hh        <"sub",      int_hexagon_A2_subh_h16_sat_hh>;
+def Hexagon_A2_subh_h16_sat_hl:
+  si_ALU64_sisi_h16_sat_hl        <"sub",      int_hexagon_A2_subh_h16_sat_hl>;
+def Hexagon_A2_subh_h16_sat_lh:
+  si_ALU64_sisi_h16_sat_lh        <"sub",      int_hexagon_A2_subh_h16_sat_lh>;
+def Hexagon_A2_subh_h16_sat_ll:
+  si_ALU64_sisi_h16_sat_ll        <"sub",      int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def Hexagon_A2_tfrp:
+  di_ALU64_di                     <"",         int_hexagon_A2_tfrp>;
+
+/********************************************************************
+*            ALU64/BIT                                              *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def Hexagon_S2_parityp:
+  si_ALU64_didi                   <"parity",   int_hexagon_S2_parityp>;
+
+/********************************************************************
+*            ALU64/PERM                                             *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def Hexagon_S2_packhl:
+  di_ALU64_sisi                   <"packhl",   int_hexagon_S2_packhl>;
+
+/********************************************************************
+*            ALU64/VB                                               *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def Hexagon_A2_vaddub:
+  di_ALU64_didi                   <"vaddub",   int_hexagon_A2_vaddub>;
+def Hexagon_A2_vaddubs:
+  di_ALU64_didi_sat               <"vaddub",   int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def Hexagon_A2_vavgub:
+  di_ALU64_didi                   <"vavgub",   int_hexagon_A2_vavgub>;
+def Hexagon_A2_vavgubr:
+  di_ALU64_didi_rnd               <"vavgub",   int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def Hexagon_A2_vcmpbeq:
+  qi_ALU64_didi                   <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def Hexagon_A2_vcmpbgtu:
+  qi_ALU64_didi                   <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def Hexagon_A2_vmaxub:
+  di_ALU64_didi                   <"vmaxub",   int_hexagon_A2_vmaxub>;
+def Hexagon_A2_vminub:
+  di_ALU64_didi                   <"vminub",   int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def Hexagon_A2_vsubub:
+  di_ALU64_didi                   <"vsubub",   int_hexagon_A2_vsubub>;
+def Hexagon_A2_vsububs:
+  di_ALU64_didi_sat               <"vsubub",   int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def Hexagon_C2_vmux:
+  di_ALU64_qididi                 <"vmux",     int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+*            ALU64/VH                                               *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def Hexagon_A2_vaddh:
+  di_ALU64_didi                   <"vaddh",    int_hexagon_A2_vaddh>;
+def Hexagon_A2_vaddhs:
+  di_ALU64_didi_sat               <"vaddh",    int_hexagon_A2_vaddhs>;
+def Hexagon_A2_vadduhs:
+  di_ALU64_didi_sat               <"vadduh",   int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def Hexagon_A2_vavgh:
+  di_ALU64_didi                   <"vavgh",    int_hexagon_A2_vavgh>;
+def Hexagon_A2_vavghcr:
+  di_ALU64_didi_crnd              <"vavgh",    int_hexagon_A2_vavghcr>;
+def Hexagon_A2_vavghr:
+  di_ALU64_didi_rnd               <"vavgh",    int_hexagon_A2_vavghr>;
+def Hexagon_A2_vavguh:
+  di_ALU64_didi                   <"vavguh",   int_hexagon_A2_vavguh>;
+def Hexagon_A2_vavguhr:
+  di_ALU64_didi_rnd               <"vavguh",   int_hexagon_A2_vavguhr>;
+def Hexagon_A2_vnavgh:
+  di_ALU64_didi                   <"vnavgh",   int_hexagon_A2_vnavgh>;
+def Hexagon_A2_vnavghcr:
+  di_ALU64_didi_crnd_sat          <"vnavgh",   int_hexagon_A2_vnavghcr>;
+def Hexagon_A2_vnavghr:
+  di_ALU64_didi_rnd_sat           <"vnavgh",   int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def Hexagon_A2_vcmpheq:
+  qi_ALU64_didi                   <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def Hexagon_A2_vcmphgt:
+  qi_ALU64_didi                   <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def Hexagon_A2_vcmphgtu:
+  qi_ALU64_didi                   <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def Hexagon_A2_vmaxh:
+  di_ALU64_didi                   <"vmaxh",    int_hexagon_A2_vmaxh>;
+def Hexagon_A2_vmaxuh:
+  di_ALU64_didi                   <"vmaxuh",   int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def Hexagon_A2_vminh:
+  di_ALU64_didi                   <"vminh",    int_hexagon_A2_vminh>;
+def Hexagon_A2_vminuh:
+  di_ALU64_didi                   <"vminuh",   int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def Hexagon_A2_vsubh:
+  di_ALU64_didi                   <"vsubh",    int_hexagon_A2_vsubh>;
+def Hexagon_A2_vsubhs:
+  di_ALU64_didi_sat               <"vsubh",    int_hexagon_A2_vsubhs>;
+def Hexagon_A2_vsubuhs:
+  di_ALU64_didi_sat               <"vsubuh",   int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+*            ALU64/VW                                               *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def Hexagon_A2_vaddw:
+  di_ALU64_didi                   <"vaddw",    int_hexagon_A2_vaddw>;
+def Hexagon_A2_vaddws:
+  di_ALU64_didi_sat               <"vaddw",   int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def Hexagon_A2_vavguw:
+  di_ALU64_didi                   <"vavguw",   int_hexagon_A2_vavguw>;
+def Hexagon_A2_vavguwr:
+  di_ALU64_didi_rnd               <"vavguw",   int_hexagon_A2_vavguwr>;
+def Hexagon_A2_vavgw:
+  di_ALU64_didi                   <"vavgw",    int_hexagon_A2_vavgw>;
+def Hexagon_A2_vavgwcr:
+  di_ALU64_didi_crnd              <"vavgw",    int_hexagon_A2_vavgwcr>;
+def Hexagon_A2_vavgwr:
+  di_ALU64_didi_rnd               <"vavgw",    int_hexagon_A2_vavgwr>;
+def Hexagon_A2_vnavgw:
+  di_ALU64_didi                   <"vnavgw",   int_hexagon_A2_vnavgw>;
+def Hexagon_A2_vnavgwcr:
+  di_ALU64_didi_crnd_sat          <"vnavgw",   int_hexagon_A2_vnavgwcr>;
+def Hexagon_A2_vnavgwr:
+  di_ALU64_didi_rnd_sat           <"vnavgw",   int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def Hexagon_A2_vcmpweq:
+  qi_ALU64_didi                   <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def Hexagon_A2_vcmpwgt:
+  qi_ALU64_didi                   <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def Hexagon_A2_vcmpwgtu:
+  qi_ALU64_didi                   <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def Hexagon_A2_vmaxw:
+  di_ALU64_didi                   <"vmaxw",    int_hexagon_A2_vmaxw>;
+def Hexagon_A2_vmaxuw:
+  di_ALU64_didi                   <"vmaxuw",   int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def Hexagon_A2_vminw:
+  di_ALU64_didi                   <"vminw",    int_hexagon_A2_vminw>;
+def Hexagon_A2_vminuw:
+  di_ALU64_didi                   <"vminuw",   int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def Hexagon_A2_vsubw:
+  di_ALU64_didi                   <"vsubw",    int_hexagon_A2_vsubw>;
+def Hexagon_A2_vsubws:
+  di_ALU64_didi_sat               <"vsubw",    int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def Hexagon_C2_all8:
+  qi_SInst_qi                     <"all8",     int_hexagon_C2_all8>;
+def Hexagon_C2_any8:
+  qi_SInst_qi                     <"any8",     int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def Hexagon_C2_pxfer_map:
+  qi_SInst_qi_pxfer               <"",         int_hexagon_C2_pxfer_map>;
+def Hexagon_C2_and:
+  qi_SInst_qiqi                   <"and",      int_hexagon_C2_and>;
+def Hexagon_C2_andn:
+  qi_SInst_qiqi_neg               <"and",      int_hexagon_C2_andn>;
+def Hexagon_C2_not:
+  qi_SInst_qi                     <"not",      int_hexagon_C2_not>;
+def Hexagon_C2_or:
+  qi_SInst_qiqi                   <"or",       int_hexagon_C2_or>;
+def Hexagon_C2_orn:
+  qi_SInst_qiqi_neg               <"or",       int_hexagon_C2_orn>;
+def Hexagon_C2_xor:
+  qi_SInst_qiqi                   <"xor",      int_hexagon_C2_xor>;
+
+
+/********************************************************************
+*            MTYPE/ALU                                              *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def Hexagon_M2_acci:
+  si_MInst_sisisi_acc             <"add",      int_hexagon_M2_acci>;
+def Hexagon_M2_accii:
+  si_MInst_sisis8_acc             <"add",      int_hexagon_M2_accii>;
+def Hexagon_M2_nacci:
+  si_MInst_sisisi_nac             <"add",      int_hexagon_M2_nacci>;
+def Hexagon_M2_naccii:
+  si_MInst_sisis8_nac             <"add",      int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def Hexagon_M2_subacc:
+  si_MInst_sisisi_acc             <"sub",      int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def Hexagon_M2_vabsdiffh:
+  di_MInst_didi                   <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def Hexagon_M2_vabsdiffw:
+  di_MInst_didi                   <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def Hexagon_M2_xor_xacc:
+  si_MInst_sisisi_xacc            <"xor",      int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+*            MTYPE/COMPLEX                                          *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def Hexagon_M2_cmpys_s1:
+  di_MInst_sisi_s1_sat            <"cmpy",     int_hexagon_M2_cmpys_s1>;
+def Hexagon_M2_cmpys_s0:
+  di_MInst_sisi_sat               <"cmpy",     int_hexagon_M2_cmpys_s0>;
+def Hexagon_M2_cmpysc_s1:
+  di_MInst_sisi_s1_sat_conj       <"cmpy",     int_hexagon_M2_cmpysc_s1>;
+def Hexagon_M2_cmpysc_s0:
+  di_MInst_sisi_sat_conj          <"cmpy",     int_hexagon_M2_cmpysc_s0>;
+
+def Hexagon_M2_cmacs_s1:
+  di_MInst_disisi_acc_s1_sat      <"cmpy",     int_hexagon_M2_cmacs_s1>;
+def Hexagon_M2_cmacs_s0:
+  di_MInst_disisi_acc_sat         <"cmpy",     int_hexagon_M2_cmacs_s0>;
+def Hexagon_M2_cmacsc_s1:
+  di_MInst_disisi_acc_s1_sat_conj <"cmpy",     int_hexagon_M2_cmacsc_s1>;
+def Hexagon_M2_cmacsc_s0:
+  di_MInst_disisi_acc_sat_conj    <"cmpy",     int_hexagon_M2_cmacsc_s0>;
+
+def Hexagon_M2_cnacs_s1:
+  di_MInst_disisi_nac_s1_sat      <"cmpy",     int_hexagon_M2_cnacs_s1>;
+def Hexagon_M2_cnacs_s0:
+  di_MInst_disisi_nac_sat         <"cmpy",     int_hexagon_M2_cnacs_s0>;
+def Hexagon_M2_cnacsc_s1:
+  di_MInst_disisi_nac_s1_sat_conj <"cmpy",     int_hexagon_M2_cnacsc_s1>;
+def Hexagon_M2_cnacsc_s0:
+  di_MInst_disisi_nac_sat_conj    <"cmpy",     int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def Hexagon_M2_cmpyr_s0:
+  di_MInst_sisi                   <"cmpyr",    int_hexagon_M2_cmpyr_s0>;
+def Hexagon_M2_cmacr_s0:
+  di_MInst_disisi_acc             <"cmpyr",    int_hexagon_M2_cmacr_s0>;
+
+def Hexagon_M2_cmpyi_s0:
+  di_MInst_sisi                   <"cmpyi",    int_hexagon_M2_cmpyi_s0>;
+def Hexagon_M2_cmaci_s0:
+  di_MInst_disisi_acc             <"cmpyi",    int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def Hexagon_M2_cmpyrs_s0:
+  si_MInst_sisi_rnd_sat           <"cmpy",     int_hexagon_M2_cmpyrs_s0>;
+def Hexagon_M2_cmpyrs_s1:
+  si_MInst_sisi_s1_rnd_sat        <"cmpy",     int_hexagon_M2_cmpyrs_s1>;
+
+def Hexagon_M2_cmpyrsc_s0:
+  si_MInst_sisi_rnd_sat_conj      <"cmpy",     int_hexagon_M2_cmpyrsc_s0>;
+def Hexagon_M2_cmpyrsc_s1:
+  si_MInst_sisi_s1_rnd_sat_conj   <"cmpy",     int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def Hexagon_M2_vcmpy_s0_sat_i:
+  di_MInst_didi_sat               <"vcmpyi",   int_hexagon_M2_vcmpy_s0_sat_i>;
+def Hexagon_M2_vcmpy_s1_sat_i:
+  di_MInst_didi_s1_sat            <"vcmpyi",   int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def Hexagon_M2_vcmpy_s0_sat_r:
+  di_MInst_didi_sat               <"vcmpyr",   int_hexagon_M2_vcmpy_s0_sat_r>;
+def Hexagon_M2_vcmpy_s1_sat_r:
+  di_MInst_didi_s1_sat            <"vcmpyr",   int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def Hexagon_M2_vcmac_s0_sat_i:
+  di_MInst_dididi_acc_sat         <"vcmpyi",   int_hexagon_M2_vcmac_s0_sat_i>;
+def Hexagon_M2_vcmac_s0_sat_r:
+  di_MInst_dididi_acc_sat         <"vcmpyr",   int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpyi_s0:
+  di_MInst_didi                   <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0>;
+def Hexagon_M2_vrcmpyr_s0:
+  di_MInst_didi                   <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0>;
+
+def Hexagon_M2_vrcmpyi_s0c:
+  di_MInst_didi_conj              <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0c>;
+def Hexagon_M2_vrcmpyr_s0c:
+  di_MInst_didi_conj              <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0c>;
+
+def Hexagon_M2_vrcmaci_s0:
+  di_MInst_dididi_acc             <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0>;
+def Hexagon_M2_vrcmacr_s0:
+  di_MInst_dididi_acc             <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0>;
+
+def Hexagon_M2_vrcmaci_s0c:
+  di_MInst_dididi_acc_conj        <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0c>;
+def Hexagon_M2_vrcmacr_s0c:
+  di_MInst_dididi_acc_conj        <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+*            MTYPE/MPYH                                             *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def Hexagon_M2_mpysmi:
+//  si_MInst_sim9                   <"mpyi",     int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpyi:
+  si_MInst_sisi                   <"mpyi",     int_hexagon_M2_mpyi>;
+def Hexagon_M2_mpyui:
+  si_MInst_sisi                   <"mpyui",    int_hexagon_M2_mpyui>;
+def Hexagon_M2_macsip:
+  si_MInst_sisiu8_acc             <"mpyi",     int_hexagon_M2_macsip>;
+def Hexagon_M2_maci:
+  si_MInst_sisisi_acc             <"mpyi",     int_hexagon_M2_maci>;
+def Hexagon_M2_macsin:
+  si_MInst_sisiu8_nac             <"mpyi",     int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyl_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpyweh",  int_hexagon_M2_mmpyl_rs1>;
+def Hexagon_M2_mmpyl_s1:
+  di_MInst_didi_s1_sat            <"vmpyweh",  int_hexagon_M2_mmpyl_s1>;
+def Hexagon_M2_mmpyl_rs0:
+  di_MInst_didi_rnd_sat           <"vmpyweh",  int_hexagon_M2_mmpyl_rs0>;
+def Hexagon_M2_mmpyl_s0:
+  di_MInst_didi_sat               <"vmpyweh",  int_hexagon_M2_mmpyl_s0>;
+def Hexagon_M2_mmpyh_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpywoh",  int_hexagon_M2_mmpyh_rs1>;
+def Hexagon_M2_mmpyh_s1:
+  di_MInst_didi_s1_sat            <"vmpywoh",  int_hexagon_M2_mmpyh_s1>;
+def Hexagon_M2_mmpyh_rs0:
+  di_MInst_didi_rnd_sat           <"vmpywoh",  int_hexagon_M2_mmpyh_rs0>;
+def Hexagon_M2_mmpyh_s0:
+  di_MInst_didi_sat               <"vmpywoh",  int_hexagon_M2_mmpyh_s0>;
+def Hexagon_M2_mmacls_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweh",  int_hexagon_M2_mmacls_rs1>;
+def Hexagon_M2_mmacls_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyweh",  int_hexagon_M2_mmacls_s1>;
+def Hexagon_M2_mmacls_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpyweh",  int_hexagon_M2_mmacls_rs0>;
+def Hexagon_M2_mmacls_s0:
+  di_MInst_dididi_acc_sat         <"vmpyweh",  int_hexagon_M2_mmacls_s0>;
+def Hexagon_M2_mmachs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywoh",  int_hexagon_M2_mmachs_rs1>;
+def Hexagon_M2_mmachs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpywoh",  int_hexagon_M2_mmachs_s1>;
+def Hexagon_M2_mmachs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpywoh",  int_hexagon_M2_mmachs_rs0>;
+def Hexagon_M2_mmachs_s0:
+  di_MInst_dididi_acc_sat         <"vmpywoh",  int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyul_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def Hexagon_M2_mmpyul_s1:
+  di_MInst_didi_s1_sat            <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def Hexagon_M2_mmpyul_rs0:
+  di_MInst_didi_rnd_sat           <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def Hexagon_M2_mmpyul_s0:
+  di_MInst_didi_sat               <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def Hexagon_M2_mmpyuh_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def Hexagon_M2_mmpyuh_s1:
+  di_MInst_didi_s1_sat            <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def Hexagon_M2_mmpyuh_rs0:
+  di_MInst_didi_rnd_sat           <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def Hexagon_M2_mmpyuh_s0:
+  di_MInst_didi_sat               <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def Hexagon_M2_mmaculs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def Hexagon_M2_mmaculs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def Hexagon_M2_mmaculs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def Hexagon_M2_mmaculs_s0:
+  di_MInst_dididi_acc_sat         <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def Hexagon_M2_mmacuhs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def Hexagon_M2_mmacuhs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def Hexagon_M2_mmacuhs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def Hexagon_M2_mmacuhs_s0:
+  di_MInst_dididi_acc_sat         <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def Hexagon_M2_hmmpyh_rs1:
+  si_MInst_sisi_h_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyh_rs1>;
+def Hexagon_M2_hmmpyl_rs1:
+  si_MInst_sisi_l_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyl_rs1>;
+def Hexagon_M2_mpy_up:
+  si_MInst_sisi                   <"mpy",      int_hexagon_M2_mpy_up>;
+def Hexagon_M2_dpmpyss_rnd_s0:
+  si_MInst_sisi_rnd               <"mpy",      int_hexagon_M2_dpmpyss_rnd_s0>;
+def Hexagon_M2_mpyu_up:
+  si_MInst_sisi                   <"mpyu",     int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def Hexagon_M2_dpmpyuu_s0:
+  di_MInst_sisi                   <"mpyu",     int_hexagon_M2_dpmpyuu_s0>;
+def Hexagon_M2_dpmpyuu_acc_s0:
+  di_MInst_disisi_acc             <"mpyu",     int_hexagon_M2_dpmpyuu_acc_s0>;
+def Hexagon_M2_dpmpyuu_nac_s0:
+  di_MInst_disisi_nac             <"mpyu",     int_hexagon_M2_dpmpyuu_nac_s0>;
+def Hexagon_M2_dpmpyss_s0:
+  di_MInst_sisi                   <"mpy",      int_hexagon_M2_dpmpyss_s0>;
+def Hexagon_M2_dpmpyss_acc_s0:
+  di_MInst_disisi_acc             <"mpy",      int_hexagon_M2_dpmpyss_acc_s0>;
+def Hexagon_M2_dpmpyss_nac_s0:
+  di_MInst_disisi_nac             <"mpy",      int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+*            MTYPE/MPYS                                             *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+//          [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def Hexagon_M2_mpy_hh_s0:
+  si_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpy_hh_s0>;
+def Hexagon_M2_mpy_hh_s1:
+  si_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpy_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s1:
+  si_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_rnd_hh_s1:
+  si_MInst_sisi_sat_rnd_hh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_hh_s1:
+  si_MInst_sisi_sat_hh_s1         <"mpy",     int_hexagon_M2_mpy_sat_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s0:
+  si_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpy_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_rnd_hh_s0:
+  si_MInst_sisi_sat_rnd_hh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_hh_s0:
+  si_MInst_sisi_sat_hh            <"mpy",     int_hexagon_M2_mpy_sat_hh_s0>;
+
+def Hexagon_M2_mpy_hl_s0:
+  si_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpy_hl_s0>;
+def Hexagon_M2_mpy_hl_s1:
+  si_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpy_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s1:
+  si_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_rnd_hl_s1:
+  si_MInst_sisi_sat_rnd_hl_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_hl_s1:
+  si_MInst_sisi_sat_hl_s1         <"mpy",     int_hexagon_M2_mpy_sat_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s0:
+  si_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpy_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_rnd_hl_s0:
+  si_MInst_sisi_sat_rnd_hl        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_hl_s0:
+  si_MInst_sisi_sat_hl            <"mpy",     int_hexagon_M2_mpy_sat_hl_s0>;
+
+def Hexagon_M2_mpy_lh_s0:
+  si_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpy_lh_s0>;
+def Hexagon_M2_mpy_lh_s1:
+  si_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpy_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s1:
+  si_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_rnd_lh_s1:
+  si_MInst_sisi_sat_rnd_lh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_lh_s1:
+  si_MInst_sisi_sat_lh_s1         <"mpy",     int_hexagon_M2_mpy_sat_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s0:
+  si_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpy_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_rnd_lh_s0:
+  si_MInst_sisi_sat_rnd_lh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_lh_s0:
+  si_MInst_sisi_sat_lh            <"mpy",     int_hexagon_M2_mpy_sat_lh_s0>;
+
+def Hexagon_M2_mpy_ll_s0:
+  si_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpy_ll_s0>;
+def Hexagon_M2_mpy_ll_s1:
+  si_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpy_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s1:
+  si_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpy_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_rnd_ll_s1:
+  si_MInst_sisi_sat_rnd_ll_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_ll_s1:
+  si_MInst_sisi_sat_ll_s1         <"mpy",     int_hexagon_M2_mpy_sat_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s0:
+  si_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpy_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_rnd_ll_s0:
+  si_MInst_sisi_sat_rnd_ll        <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_ll_s0:
+  si_MInst_sisi_sat_ll            <"mpy",     int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def Hexagon_M2_mpyd_hh_s0:
+  di_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpyd_hh_s0>;
+def Hexagon_M2_mpyd_hh_s1:
+  di_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpyd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s1:
+  di_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s0:
+  di_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def Hexagon_M2_mpyd_hl_s0:
+  di_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpyd_hl_s0>;
+def Hexagon_M2_mpyd_hl_s1:
+  di_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpyd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s1:
+  di_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s0:
+  di_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def Hexagon_M2_mpyd_lh_s0:
+  di_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpyd_lh_s0>;
+def Hexagon_M2_mpyd_lh_s1:
+  di_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpyd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s1:
+  di_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s0:
+  di_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def Hexagon_M2_mpyd_ll_s0:
+  di_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpyd_ll_s0>;
+def Hexagon_M2_mpyd_ll_s1:
+  di_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpyd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s1:
+  di_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s0:
+  di_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_acc_hh_s0:
+  si_MInst_sisisi_acc_hh            <"mpy",  int_hexagon_M2_mpy_acc_hh_s0>;
+def Hexagon_M2_mpy_acc_hh_s1:
+  si_MInst_sisisi_acc_hh_s1         <"mpy",  int_hexagon_M2_mpy_acc_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s1:
+  si_MInst_sisisi_acc_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s0:
+  si_MInst_sisisi_acc_sat_hh        <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def Hexagon_M2_mpy_acc_hl_s0:
+  si_MInst_sisisi_acc_hl            <"mpy",  int_hexagon_M2_mpy_acc_hl_s0>;
+def Hexagon_M2_mpy_acc_hl_s1:
+  si_MInst_sisisi_acc_hl_s1         <"mpy",  int_hexagon_M2_mpy_acc_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s1:
+  si_MInst_sisisi_acc_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s0:
+  si_MInst_sisisi_acc_sat_hl        <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def Hexagon_M2_mpy_acc_lh_s0:
+  si_MInst_sisisi_acc_lh            <"mpy",  int_hexagon_M2_mpy_acc_lh_s0>;
+def Hexagon_M2_mpy_acc_lh_s1:
+  si_MInst_sisisi_acc_lh_s1         <"mpy",  int_hexagon_M2_mpy_acc_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s1:
+  si_MInst_sisisi_acc_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s0:
+  si_MInst_sisisi_acc_sat_lh        <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def Hexagon_M2_mpy_acc_ll_s0:
+  si_MInst_sisisi_acc_ll            <"mpy",  int_hexagon_M2_mpy_acc_ll_s0>;
+def Hexagon_M2_mpy_acc_ll_s1:
+  si_MInst_sisisi_acc_ll_s1         <"mpy",  int_hexagon_M2_mpy_acc_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s1:
+  si_MInst_sisisi_acc_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s0:
+  si_MInst_sisisi_acc_sat_ll        <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_nac_hh_s0:
+  si_MInst_sisisi_nac_hh            <"mpy",  int_hexagon_M2_mpy_nac_hh_s0>;
+def Hexagon_M2_mpy_nac_hh_s1:
+  si_MInst_sisisi_nac_hh_s1         <"mpy",  int_hexagon_M2_mpy_nac_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s1:
+  si_MInst_sisisi_nac_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s0:
+  si_MInst_sisisi_nac_sat_hh        <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def Hexagon_M2_mpy_nac_hl_s0:
+  si_MInst_sisisi_nac_hl            <"mpy",  int_hexagon_M2_mpy_nac_hl_s0>;
+def Hexagon_M2_mpy_nac_hl_s1:
+  si_MInst_sisisi_nac_hl_s1         <"mpy",  int_hexagon_M2_mpy_nac_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s1:
+  si_MInst_sisisi_nac_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s0:
+  si_MInst_sisisi_nac_sat_hl        <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def Hexagon_M2_mpy_nac_lh_s0:
+  si_MInst_sisisi_nac_lh            <"mpy",  int_hexagon_M2_mpy_nac_lh_s0>;
+def Hexagon_M2_mpy_nac_lh_s1:
+  si_MInst_sisisi_nac_lh_s1         <"mpy",  int_hexagon_M2_mpy_nac_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s1:
+  si_MInst_sisisi_nac_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s0:
+  si_MInst_sisisi_nac_sat_lh        <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def Hexagon_M2_mpy_nac_ll_s0:
+  si_MInst_sisisi_nac_ll            <"mpy",  int_hexagon_M2_mpy_nac_ll_s0>;
+def Hexagon_M2_mpy_nac_ll_s1:
+  si_MInst_sisisi_nac_ll_s1         <"mpy",  int_hexagon_M2_mpy_nac_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s1:
+  si_MInst_sisisi_nac_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s0:
+  si_MInst_sisisi_nac_sat_ll        <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_acc_hh_s0:
+  di_MInst_disisi_acc_hh          <"mpy",    int_hexagon_M2_mpyd_acc_hh_s0>;
+def Hexagon_M2_mpyd_acc_hh_s1:
+  di_MInst_disisi_acc_hh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def Hexagon_M2_mpyd_acc_hl_s0:
+  di_MInst_disisi_acc_hl          <"mpy",    int_hexagon_M2_mpyd_acc_hl_s0>;
+def Hexagon_M2_mpyd_acc_hl_s1:
+  di_MInst_disisi_acc_hl_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def Hexagon_M2_mpyd_acc_lh_s0:
+  di_MInst_disisi_acc_lh          <"mpy",    int_hexagon_M2_mpyd_acc_lh_s0>;
+def Hexagon_M2_mpyd_acc_lh_s1:
+  di_MInst_disisi_acc_lh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def Hexagon_M2_mpyd_acc_ll_s0:
+  di_MInst_disisi_acc_ll          <"mpy",    int_hexagon_M2_mpyd_acc_ll_s0>;
+def Hexagon_M2_mpyd_acc_ll_s1:
+  di_MInst_disisi_acc_ll_s1       <"mpy",    int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_nac_hh_s0:
+  di_MInst_disisi_nac_hh          <"mpy",    int_hexagon_M2_mpyd_nac_hh_s0>;
+def Hexagon_M2_mpyd_nac_hh_s1:
+  di_MInst_disisi_nac_hh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def Hexagon_M2_mpyd_nac_hl_s0:
+  di_MInst_disisi_nac_hl          <"mpy",    int_hexagon_M2_mpyd_nac_hl_s0>;
+def Hexagon_M2_mpyd_nac_hl_s1:
+  di_MInst_disisi_nac_hl_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def Hexagon_M2_mpyd_nac_lh_s0:
+  di_MInst_disisi_nac_lh          <"mpy",    int_hexagon_M2_mpyd_nac_lh_s0>;
+def Hexagon_M2_mpyd_nac_lh_s1:
+  di_MInst_disisi_nac_lh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def Hexagon_M2_mpyd_nac_ll_s0:
+  di_MInst_disisi_nac_ll          <"mpy",    int_hexagon_M2_mpyd_nac_ll_s0>;
+def Hexagon_M2_mpyd_nac_ll_s1:
+  di_MInst_disisi_nac_ll_s1       <"mpy",    int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_hh_s0:
+  si_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyu_hh_s0>;
+def Hexagon_M2_mpyu_hh_s1:
+  si_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyu_hh_s1>;
+def Hexagon_M2_mpyu_hl_s0:
+  si_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyu_hl_s0>;
+def Hexagon_M2_mpyu_hl_s1:
+  si_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyu_hl_s1>;
+def Hexagon_M2_mpyu_lh_s0:
+  si_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyu_lh_s0>;
+def Hexagon_M2_mpyu_lh_s1:
+  si_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyu_lh_s1>;
+def Hexagon_M2_mpyu_ll_s0:
+  si_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyu_ll_s0>;
+def Hexagon_M2_mpyu_ll_s1:
+  si_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_hh_s0:
+  di_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyud_hh_s0>;
+def Hexagon_M2_mpyud_hh_s1:
+  di_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyud_hh_s1>;
+def Hexagon_M2_mpyud_hl_s0:
+  di_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyud_hl_s0>;
+def Hexagon_M2_mpyud_hl_s1:
+  di_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyud_hl_s1>;
+def Hexagon_M2_mpyud_lh_s0:
+  di_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyud_lh_s0>;
+def Hexagon_M2_mpyud_lh_s1:
+  di_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyud_lh_s1>;
+def Hexagon_M2_mpyud_ll_s0:
+  di_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyud_ll_s0>;
+def Hexagon_M2_mpyud_ll_s1:
+  di_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_acc_hh_s0:
+  si_MInst_sisisi_acc_hh            <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s0>;
+def Hexagon_M2_mpyu_acc_hh_s1:
+  si_MInst_sisisi_acc_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s1>;
+def Hexagon_M2_mpyu_acc_hl_s0:
+  si_MInst_sisisi_acc_hl            <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s0>;
+def Hexagon_M2_mpyu_acc_hl_s1:
+  si_MInst_sisisi_acc_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s1>;
+def Hexagon_M2_mpyu_acc_lh_s0:
+  si_MInst_sisisi_acc_lh            <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s0>;
+def Hexagon_M2_mpyu_acc_lh_s1:
+  si_MInst_sisisi_acc_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s1>;
+def Hexagon_M2_mpyu_acc_ll_s0:
+  si_MInst_sisisi_acc_ll            <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s0>;
+def Hexagon_M2_mpyu_acc_ll_s1:
+  si_MInst_sisisi_acc_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_nac_hh_s0:
+  si_MInst_sisisi_nac_hh            <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s0>;
+def Hexagon_M2_mpyu_nac_hh_s1:
+  si_MInst_sisisi_nac_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s1>;
+def Hexagon_M2_mpyu_nac_hl_s0:
+  si_MInst_sisisi_nac_hl            <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s0>;
+def Hexagon_M2_mpyu_nac_hl_s1:
+  si_MInst_sisisi_nac_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s1>;
+def Hexagon_M2_mpyu_nac_lh_s0:
+  si_MInst_sisisi_nac_lh            <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s0>;
+def Hexagon_M2_mpyu_nac_lh_s1:
+  si_MInst_sisisi_nac_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s1>;
+def Hexagon_M2_mpyu_nac_ll_s0:
+  si_MInst_sisisi_nac_ll            <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s0>;
+def Hexagon_M2_mpyu_nac_ll_s1:
+  si_MInst_sisisi_nac_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_acc_hh_s0:
+  di_MInst_disisi_acc_hh            <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def Hexagon_M2_mpyud_acc_hh_s1:
+  di_MInst_disisi_acc_hh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def Hexagon_M2_mpyud_acc_hl_s0:
+  di_MInst_disisi_acc_hl            <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def Hexagon_M2_mpyud_acc_hl_s1:
+  di_MInst_disisi_acc_hl_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def Hexagon_M2_mpyud_acc_lh_s0:
+  di_MInst_disisi_acc_lh            <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def Hexagon_M2_mpyud_acc_lh_s1:
+  di_MInst_disisi_acc_lh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def Hexagon_M2_mpyud_acc_ll_s0:
+  di_MInst_disisi_acc_ll            <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def Hexagon_M2_mpyud_acc_ll_s1:
+  di_MInst_disisi_acc_ll_s1         <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_nac_hh_s0:
+  di_MInst_disisi_nac_hh            <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def Hexagon_M2_mpyud_nac_hh_s1:
+  di_MInst_disisi_nac_hh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def Hexagon_M2_mpyud_nac_hl_s0:
+  di_MInst_disisi_nac_hl            <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def Hexagon_M2_mpyud_nac_hl_s1:
+  di_MInst_disisi_nac_hl_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def Hexagon_M2_mpyud_nac_lh_s0:
+  di_MInst_disisi_nac_lh            <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def Hexagon_M2_mpyud_nac_lh_s1:
+  di_MInst_disisi_nac_lh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def Hexagon_M2_mpyud_nac_ll_s0:
+  di_MInst_disisi_nac_ll            <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def Hexagon_M2_mpyud_nac_ll_s1:
+  di_MInst_disisi_nac_ll_s1         <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+*            MTYPE/VB                                               *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_A2_vraddub:
+  di_MInst_didi                   <"vraddub", int_hexagon_A2_vraddub>;
+def Hexagon_A2_vraddub_acc:
+  di_MInst_dididi_acc             <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def Hexagon_A2_vrsadub:
+  di_MInst_didi                   <"vrsadub", int_hexagon_A2_vrsadub>;
+def Hexagon_A2_vrsadub_acc:
+  di_MInst_dididi_acc             <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+*            MTYPE/VH                                               *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def Hexagon_M2_vdmpys_s1:
+  di_MInst_didi_s1_sat            <"vdmpy",   int_hexagon_M2_vdmpys_s1>;
+def Hexagon_M2_vdmpys_s0:
+  di_MInst_didi_sat               <"vdmpy",   int_hexagon_M2_vdmpys_s0>;
+def Hexagon_M2_vdmacs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vdmpy",   int_hexagon_M2_vdmacs_s1>;
+def Hexagon_M2_vdmacs_s0:
+  di_MInst_dididi_acc_sat         <"vdmpy",   int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def Hexagon_M2_vdmpyrs_s0:
+  si_MInst_didi_rnd_sat           <"vdmpy",   int_hexagon_M2_vdmpyrs_s0>;
+def Hexagon_M2_vdmpyrs_s1:
+  si_MInst_didi_s1_rnd_sat        <"vdmpy",   int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def Hexagon_M2_vmpy2es_s1:
+  di_MInst_didi_s1_sat            <"vmpyeh",  int_hexagon_M2_vmpy2es_s1>;
+def Hexagon_M2_vmpy2es_s0:
+  di_MInst_didi_sat               <"vmpyeh",  int_hexagon_M2_vmpy2es_s0>;
+def Hexagon_M2_vmac2es:
+  di_MInst_dididi_acc             <"vmpyeh",  int_hexagon_M2_vmac2es>;
+def Hexagon_M2_vmac2es_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyeh",  int_hexagon_M2_vmac2es_s1>;
+def Hexagon_M2_vmac2es_s0:
+  di_MInst_dididi_acc_sat         <"vmpyeh",  int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def Hexagon_M2_vmpy2s_s0:
+  di_MInst_sisi_sat               <"vmpyh",   int_hexagon_M2_vmpy2s_s0>;
+def Hexagon_M2_vmpy2s_s1:
+  di_MInst_sisi_s1_sat            <"vmpyh",   int_hexagon_M2_vmpy2s_s1>;
+def Hexagon_M2_vmac2:
+  di_MInst_disisi_acc             <"vmpyh",   int_hexagon_M2_vmac2>;
+def Hexagon_M2_vmac2s_s0:
+  di_MInst_disisi_acc_sat         <"vmpyh",   int_hexagon_M2_vmac2s_s0>;
+def Hexagon_M2_vmac2s_s1:
+  di_MInst_disisi_acc_s1_sat      <"vmpyh",   int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def Hexagon_M2_vmpy2s_s0pack:
+  si_MInst_sisi_rnd_sat           <"vmpyh",   int_hexagon_M2_vmpy2s_s0pack>;
+def Hexagon_M2_vmpy2s_s1pack:
+  si_MInst_sisi_s1_rnd_sat        <"vmpyh",   int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def Hexagon_M2_vrmpy_s0:
+  di_MInst_didi                   <"vrmpyh",  int_hexagon_M2_vrmpy_s0>;
+def Hexagon_M2_vrmac_s0:
+  di_MInst_dididi_acc             <"vrmpyh",  int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+*            STYPE/ALU                                              *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def Hexagon_A2_abs:
+  si_SInst_si                     <"abs",     int_hexagon_A2_abs>;
+def Hexagon_A2_absp:
+  di_SInst_di                     <"abs",     int_hexagon_A2_absp>;
+def Hexagon_A2_abssat:
+  si_SInst_si_sat                 <"abs",     int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def Hexagon_A2_negp:
+  di_SInst_di                     <"neg",     int_hexagon_A2_negp>;
+def Hexagon_A2_negsat:
+  si_SInst_si_sat                 <"neg",     int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def Hexagon_A2_notp:
+  di_SInst_di                     <"not",     int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def Hexagon_A2_sxtw:
+  di_SInst_si                     <"sxtw",     int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+*            STYPE/BIT                                              *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def Hexagon_S2_cl0:
+  si_SInst_si                     <"cl0",     int_hexagon_S2_cl0>;
+def Hexagon_S2_cl0p:
+  si_SInst_di                     <"cl0",     int_hexagon_S2_cl0p>;
+def Hexagon_S2_cl1:
+  si_SInst_si                     <"cl1",     int_hexagon_S2_cl1>;
+def Hexagon_S2_cl1p:
+  si_SInst_di                     <"cl1",     int_hexagon_S2_cl1p>;
+def Hexagon_S2_clb:
+  si_SInst_si                     <"clb",     int_hexagon_S2_clb>;
+def Hexagon_S2_clbp:
+  si_SInst_di                     <"clb",     int_hexagon_S2_clbp>;
+def Hexagon_S2_clbnorm:
+  si_SInst_si                     <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def Hexagon_S2_ct0:
+  si_SInst_si                     <"ct0",     int_hexagon_S2_ct0>;
+def Hexagon_S2_ct1:
+  si_SInst_si                     <"ct1",     int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def HEXAGON_C2_bitsclr:
+  qi_SInst_sisi                   <"bitsclr", int_hexagon_C2_bitsclr>;
+def HEXAGON_C2_bitsclri:
+  qi_SInst_siu6                   <"bitsclr", int_hexagon_C2_bitsclri>;
+def HEXAGON_C2_bitsset:
+  qi_SInst_sisi                   <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def Hexagon_S2_extractu:
+  si_SInst_siu5u5                 <"extractu",int_hexagon_S2_extractu>;
+def Hexagon_S2_extractu_rp:
+  si_SInst_sidi                   <"extractu",int_hexagon_S2_extractu_rp>;
+def Hexagon_S2_extractup:
+  di_SInst_diu6u6                 <"extractu",int_hexagon_S2_extractup>;
+def Hexagon_S2_extractup_rp:
+  di_SInst_didi                   <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def HEXAGON_S2_insert:
+  si_SInst_sisiu5u5               <"insert",  int_hexagon_S2_insert>;
+def HEXAGON_S2_insert_rp:
+  si_SInst_sisidi                 <"insert",  int_hexagon_S2_insert_rp>;
+def HEXAGON_S2_insertp:
+  di_SInst_didiu6u6               <"insert",  int_hexagon_S2_insertp>;
+def HEXAGON_S2_insertp_rp:
+  di_SInst_dididi                 <"insert",  int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def HEXAGON_S2_interleave:
+  di_SInst_di                     <"interleave", int_hexagon_S2_interleave>;
+def HEXAGON_S2_deinterleave:
+  di_SInst_di                     <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def HEXAGON_S2_lfsp:
+  di_SInst_didi                   <"lfs",     int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def HEXAGON_S2_brev:
+  si_SInst_si                     <"brev",    int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def Hexagon_S2_setbit_i:
+  si_SInst_siu5                   <"setbit",  int_hexagon_S2_setbit_i>;
+def Hexagon_S2_togglebit_i:
+  si_SInst_siu5                   <"togglebit", int_hexagon_S2_togglebit_i>;
+def Hexagon_S2_clrbit_i:
+  si_SInst_siu5                   <"clrbit",  int_hexagon_S2_clrbit_i>;
+def Hexagon_S2_setbit_r:
+  si_SInst_sisi                   <"setbit",  int_hexagon_S2_setbit_r>;
+def Hexagon_S2_togglebit_r:
+  si_SInst_sisi                   <"togglebit", int_hexagon_S2_togglebit_r>;
+def Hexagon_S2_clrbit_r:
+  si_SInst_sisi                   <"clrbit",  int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def Hexagon_S2_tstbit_i:
+  qi_SInst_siu5                   <"tstbit",  int_hexagon_S2_tstbit_i>;
+def Hexagon_S2_tstbit_r:
+  qi_SInst_sisi                   <"tstbit",  int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+*            STYPE/COMPLEX                                          *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def Hexagon_A2_vconj:
+  di_SInst_di_sat                 <"vconj",   int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def Hexagon_S2_vcrotate:
+  di_SInst_disi                   <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+*            STYPE/PERM                                             *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def Hexagon_A2_sat:
+  si_SInst_di                     <"sat",     int_hexagon_A2_sat>;
+def Hexagon_A2_satb:
+  si_SInst_si                     <"satb",    int_hexagon_A2_satb>;
+def Hexagon_A2_sath:
+  si_SInst_si                     <"sath",    int_hexagon_A2_sath>;
+def Hexagon_A2_satub:
+  si_SInst_si                     <"satub",   int_hexagon_A2_satub>;
+def Hexagon_A2_satuh:
+  si_SInst_si                     <"satuh",   int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def Hexagon_A2_swiz:
+  si_SInst_si                     <"swiz",    int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def Hexagon_S2_valignib:
+  di_SInst_didiu3                 <"valignb", int_hexagon_S2_valignib>;
+def Hexagon_S2_valignrb:
+  di_SInst_didiqi                 <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def Hexagon_S2_vrndpackwh:
+  si_SInst_di                     <"vrndwh",  int_hexagon_S2_vrndpackwh>;
+def Hexagon_S2_vrndpackwhs:
+  si_SInst_di_sat                 <"vrndwh",  int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def Hexagon_S2_svsathb:
+  si_SInst_si                     <"vsathb",  int_hexagon_S2_svsathb>;
+def Hexagon_S2_vsathb:
+  si_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb>;
+def Hexagon_S2_svsathub:
+  si_SInst_si                     <"vsathub", int_hexagon_S2_svsathub>;
+def Hexagon_S2_vsathub:
+  si_SInst_di                     <"vsathub", int_hexagon_S2_vsathub>;
+def Hexagon_S2_vsatwh:
+  si_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh>;
+def Hexagon_S2_vsatwuh:
+  si_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def Hexagon_S2_vsathb_nopack:
+  di_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb_nopack>;
+def Hexagon_S2_vsathub_nopack:
+  di_SInst_di                     <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def Hexagon_S2_vsatwh_nopack:
+  di_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh_nopack>;
+def Hexagon_S2_vsatwuh_nopack:
+  di_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def Hexagon_S2_shuffeb:
+  di_SInst_didi                   <"shuffeb", int_hexagon_S2_shuffeb>;
+def Hexagon_S2_shuffeh:
+  di_SInst_didi                   <"shuffeh", int_hexagon_S2_shuffeh>;
+def Hexagon_S2_shuffob:
+  di_SInst_didi                   <"shuffob", int_hexagon_S2_shuffob>;
+def Hexagon_S2_shuffoh:
+  di_SInst_didi                   <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def Hexagon_S2_vsplatrb:
+  si_SInst_si                     <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def Hexagon_S2_vsplatrh:
+  di_SInst_si                     <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def HEXAGON_S2_vsplicerb:
+  di_SInst_didiqi                 <"vspliceb",int_hexagon_S2_vsplicerb>;
+def HEXAGON_S2_vspliceib:
+  di_SInst_didiu3                 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def Hexagon_S2_vsxtbh:
+  di_SInst_si                     <"vsxtbh",  int_hexagon_S2_vsxtbh>;
+def Hexagon_S2_vsxthw:
+  di_SInst_si                     <"vsxthw",  int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def Hexagon_S2_vtrunehb:
+  si_SInst_di                     <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def Hexagon_S2_vtrunohb:
+  si_SInst_di                     <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def Hexagon_S2_vtrunewh:
+  di_SInst_didi                   <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def Hexagon_S2_vtrunowh:
+  di_SInst_didi                   <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def Hexagon_S2_vzxtbh:
+  di_SInst_si                     <"vzxtbh",  int_hexagon_S2_vzxtbh>;
+def Hexagon_S2_vzxthw:
+  di_SInst_si                     <"vzxthw",  int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+*            STYPE/PRED                                             *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def Hexagon_C2_mask:
+  di_SInst_qi                     <"mask",   int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def Hexagon_C2_tfrpr:
+  si_SInst_qi                     <"",       int_hexagon_C2_tfrpr>;
+def Hexagon_C2_tfrrp:
+  qi_SInst_si                     <"",       int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def Hexagon_C2_vitpack:
+  si_SInst_qiqi                   <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+*            STYPE/SHIFT                                            *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def Hexagon_S2_asl_i_r:
+  si_SInst_siu5                   <"asl",     int_hexagon_S2_asl_i_r>;
+def Hexagon_S2_asr_i_r:
+  si_SInst_siu5                   <"asr",     int_hexagon_S2_asr_i_r>;
+def Hexagon_S2_lsr_i_r:
+  si_SInst_siu5                   <"lsr",     int_hexagon_S2_lsr_i_r>;
+def Hexagon_S2_asl_i_p:
+  di_SInst_diu6                   <"asl",     int_hexagon_S2_asl_i_p>;
+def Hexagon_S2_asr_i_p:
+  di_SInst_diu6                   <"asr",     int_hexagon_S2_asr_i_p>;
+def Hexagon_S2_lsr_i_p:
+  di_SInst_diu6                   <"lsr",     int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def Hexagon_S2_asl_i_r_acc:
+  si_SInst_sisiu5_acc             <"asl",     int_hexagon_S2_asl_i_r_acc>;
+def Hexagon_S2_asr_i_r_acc:
+  si_SInst_sisiu5_acc             <"asr",     int_hexagon_S2_asr_i_r_acc>;
+def Hexagon_S2_lsr_i_r_acc:
+  si_SInst_sisiu5_acc             <"lsr",     int_hexagon_S2_lsr_i_r_acc>;
+def Hexagon_S2_asl_i_r_nac:
+  si_SInst_sisiu5_nac             <"asl",     int_hexagon_S2_asl_i_r_nac>;
+def Hexagon_S2_asr_i_r_nac:
+  si_SInst_sisiu5_nac             <"asr",     int_hexagon_S2_asr_i_r_nac>;
+def Hexagon_S2_lsr_i_r_nac:
+  si_SInst_sisiu5_nac             <"lsr",     int_hexagon_S2_lsr_i_r_nac>;
+def Hexagon_S2_asl_i_p_acc:
+  di_SInst_didiu6_acc             <"asl",     int_hexagon_S2_asl_i_p_acc>;
+def Hexagon_S2_asr_i_p_acc:
+  di_SInst_didiu6_acc             <"asr",     int_hexagon_S2_asr_i_p_acc>;
+def Hexagon_S2_lsr_i_p_acc:
+  di_SInst_didiu6_acc             <"lsr",     int_hexagon_S2_lsr_i_p_acc>;
+def Hexagon_S2_asl_i_p_nac:
+  di_SInst_didiu6_nac             <"asl",     int_hexagon_S2_asl_i_p_nac>;
+def Hexagon_S2_asr_i_p_nac:
+  di_SInst_didiu6_nac             <"asr",     int_hexagon_S2_asr_i_p_nac>;
+def Hexagon_S2_lsr_i_p_nac:
+  di_SInst_didiu6_nac             <"lsr",     int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def Hexagon_S2_addasl_rrri:
+  si_SInst_sisiu3                 <"addasl",  int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def Hexagon_S2_asl_i_r_and:
+  si_SInst_sisiu5_and             <"asl",     int_hexagon_S2_asl_i_r_and>;
+def Hexagon_S2_asr_i_r_and:
+  si_SInst_sisiu5_and             <"asr",     int_hexagon_S2_asr_i_r_and>;
+def Hexagon_S2_lsr_i_r_and:
+  si_SInst_sisiu5_and             <"lsr",     int_hexagon_S2_lsr_i_r_and>;
+
+def Hexagon_S2_asl_i_r_xacc:
+  si_SInst_sisiu5_xor             <"asl",     int_hexagon_S2_asl_i_r_xacc>;
+def Hexagon_S2_lsr_i_r_xacc:
+  si_SInst_sisiu5_xor             <"lsr",     int_hexagon_S2_lsr_i_r_xacc>;
+
+def Hexagon_S2_asl_i_r_or:
+  si_SInst_sisiu5_or              <"asl",     int_hexagon_S2_asl_i_r_or>;
+def Hexagon_S2_asr_i_r_or:
+  si_SInst_sisiu5_or              <"asr",     int_hexagon_S2_asr_i_r_or>;
+def Hexagon_S2_lsr_i_r_or:
+  si_SInst_sisiu5_or              <"lsr",     int_hexagon_S2_lsr_i_r_or>;
+
+def Hexagon_S2_asl_i_p_and:
+  di_SInst_didiu6_and             <"asl",     int_hexagon_S2_asl_i_p_and>;
+def Hexagon_S2_asr_i_p_and:
+  di_SInst_didiu6_and             <"asr",     int_hexagon_S2_asr_i_p_and>;
+def Hexagon_S2_lsr_i_p_and:
+  di_SInst_didiu6_and             <"lsr",     int_hexagon_S2_lsr_i_p_and>;
+
+def Hexagon_S2_asl_i_p_xacc:
+  di_SInst_didiu6_xor             <"asl",     int_hexagon_S2_asl_i_p_xacc>;
+def Hexagon_S2_lsr_i_p_xacc:
+  di_SInst_didiu6_xor             <"lsr",     int_hexagon_S2_lsr_i_p_xacc>;
+
+def Hexagon_S2_asl_i_p_or:
+  di_SInst_didiu6_or              <"asl",     int_hexagon_S2_asl_i_p_or>;
+def Hexagon_S2_asr_i_p_or:
+  di_SInst_didiu6_or              <"asr",     int_hexagon_S2_asr_i_p_or>;
+def Hexagon_S2_lsr_i_p_or:
+  di_SInst_didiu6_or              <"lsr",     int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def Hexagon_S2_asr_i_r_rnd:
+  si_SInst_siu5_rnd               <"asr",     int_hexagon_S2_asr_i_r_rnd>;
+def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+  si_SInst_siu5              <"asrrnd",  int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def Hexagon_S2_asl_i_r_sat:
+  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def Hexagon_S2_asl_r_r:
+  si_SInst_sisi                   <"asl",     int_hexagon_S2_asl_r_r>;
+def Hexagon_S2_asr_r_r:
+  si_SInst_sisi                   <"asr",     int_hexagon_S2_asr_r_r>;
+def Hexagon_S2_lsl_r_r:
+  si_SInst_sisi                   <"lsl",     int_hexagon_S2_lsl_r_r>;
+def Hexagon_S2_lsr_r_r:
+  si_SInst_sisi                   <"lsr",     int_hexagon_S2_lsr_r_r>;
+def Hexagon_S2_asl_r_p:
+  di_SInst_disi                   <"asl",     int_hexagon_S2_asl_r_p>;
+def Hexagon_S2_asr_r_p:
+  di_SInst_disi                   <"asr",     int_hexagon_S2_asr_r_p>;
+def Hexagon_S2_lsl_r_p:
+  di_SInst_disi                   <"lsl",     int_hexagon_S2_lsl_r_p>;
+def Hexagon_S2_lsr_r_p:
+  di_SInst_disi                   <"lsr",     int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def Hexagon_S2_asl_r_r_acc:
+  si_SInst_sisisi_acc             <"asl",     int_hexagon_S2_asl_r_r_acc>;
+def Hexagon_S2_asr_r_r_acc:
+  si_SInst_sisisi_acc             <"asr",     int_hexagon_S2_asr_r_r_acc>;
+def Hexagon_S2_lsl_r_r_acc:
+  si_SInst_sisisi_acc             <"lsl",     int_hexagon_S2_lsl_r_r_acc>;
+def Hexagon_S2_lsr_r_r_acc:
+  si_SInst_sisisi_acc             <"lsr",     int_hexagon_S2_lsr_r_r_acc>;
+def Hexagon_S2_asl_r_p_acc:
+  di_SInst_didisi_acc             <"asl",     int_hexagon_S2_asl_r_p_acc>;
+def Hexagon_S2_asr_r_p_acc:
+  di_SInst_didisi_acc             <"asr",     int_hexagon_S2_asr_r_p_acc>;
+def Hexagon_S2_lsl_r_p_acc:
+  di_SInst_didisi_acc             <"lsl",     int_hexagon_S2_lsl_r_p_acc>;
+def Hexagon_S2_lsr_r_p_acc:
+  di_SInst_didisi_acc             <"lsr",     int_hexagon_S2_lsr_r_p_acc>;
+
+def Hexagon_S2_asl_r_r_nac:
+  si_SInst_sisisi_nac             <"asl",     int_hexagon_S2_asl_r_r_nac>;
+def Hexagon_S2_asr_r_r_nac:
+  si_SInst_sisisi_nac             <"asr",     int_hexagon_S2_asr_r_r_nac>;
+def Hexagon_S2_lsl_r_r_nac:
+  si_SInst_sisisi_nac             <"lsl",     int_hexagon_S2_lsl_r_r_nac>;
+def Hexagon_S2_lsr_r_r_nac:
+  si_SInst_sisisi_nac             <"lsr",     int_hexagon_S2_lsr_r_r_nac>;
+def Hexagon_S2_asl_r_p_nac:
+  di_SInst_didisi_nac             <"asl",     int_hexagon_S2_asl_r_p_nac>;
+def Hexagon_S2_asr_r_p_nac:
+  di_SInst_didisi_nac             <"asr",     int_hexagon_S2_asr_r_p_nac>;
+def Hexagon_S2_lsl_r_p_nac:
+  di_SInst_didisi_nac             <"lsl",     int_hexagon_S2_lsl_r_p_nac>;
+def Hexagon_S2_lsr_r_p_nac:
+  di_SInst_didisi_nac             <"lsr",     int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def Hexagon_S2_asl_r_r_and:
+  si_SInst_sisisi_and             <"asl",     int_hexagon_S2_asl_r_r_and>;
+def Hexagon_S2_asr_r_r_and:
+  si_SInst_sisisi_and             <"asr",     int_hexagon_S2_asr_r_r_and>;
+def Hexagon_S2_lsl_r_r_and:
+  si_SInst_sisisi_and             <"lsl",     int_hexagon_S2_lsl_r_r_and>;
+def Hexagon_S2_lsr_r_r_and:
+  si_SInst_sisisi_and             <"lsr",     int_hexagon_S2_lsr_r_r_and>;
+
+def Hexagon_S2_asl_r_r_or:
+  si_SInst_sisisi_or              <"asl",     int_hexagon_S2_asl_r_r_or>;
+def Hexagon_S2_asr_r_r_or:
+  si_SInst_sisisi_or              <"asr",     int_hexagon_S2_asr_r_r_or>;
+def Hexagon_S2_lsl_r_r_or:
+  si_SInst_sisisi_or              <"lsl",     int_hexagon_S2_lsl_r_r_or>;
+def Hexagon_S2_lsr_r_r_or:
+  si_SInst_sisisi_or              <"lsr",     int_hexagon_S2_lsr_r_r_or>;
+
+def Hexagon_S2_asl_r_p_and:
+  di_SInst_didisi_and             <"asl",     int_hexagon_S2_asl_r_p_and>;
+def Hexagon_S2_asr_r_p_and:
+  di_SInst_didisi_and             <"asr",     int_hexagon_S2_asr_r_p_and>;
+def Hexagon_S2_lsl_r_p_and:
+  di_SInst_didisi_and             <"lsl",     int_hexagon_S2_lsl_r_p_and>;
+def Hexagon_S2_lsr_r_p_and:
+  di_SInst_didisi_and             <"lsr",     int_hexagon_S2_lsr_r_p_and>;
+
+def Hexagon_S2_asl_r_p_or:
+  di_SInst_didisi_or              <"asl",     int_hexagon_S2_asl_r_p_or>;
+def Hexagon_S2_asr_r_p_or:
+  di_SInst_didisi_or              <"asr",     int_hexagon_S2_asr_r_p_or>;
+def Hexagon_S2_lsl_r_p_or:
+  di_SInst_didisi_or              <"lsl",     int_hexagon_S2_lsl_r_p_or>;
+def Hexagon_S2_lsr_r_p_or:
+  di_SInst_didisi_or              <"lsr",     int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def Hexagon_S2_asl_r_r_sat:
+  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_r_r_sat>;
+def Hexagon_S2_asr_r_r_sat:
+  si_SInst_sisi_sat               <"asr",     int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def HEXAGON_S2_tableidxb_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def HEXAGON_S2_tableidxd_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def HEXAGON_S2_tableidxh_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def HEXAGON_S2_tableidxw_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+*            STYPE/VH                                               *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def Hexagon_A2_vabsh:
+  di_SInst_di                     <"vabsh",   int_hexagon_A2_vabsh>;
+def Hexagon_A2_vabshsat:
+  di_SInst_di_sat                 <"vabsh",   int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def Hexagon_S2_asl_i_vh:
+  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_i_vh>;
+def Hexagon_S2_asr_i_vh:
+  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_i_vh>;
+def Hexagon_S2_lsr_i_vh:
+  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vh:
+  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_r_vh>;
+def Hexagon_S2_asr_r_vh:
+  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_r_vh>;
+def Hexagon_S2_lsl_r_vh:
+  di_SInst_disi                   <"vlslh",   int_hexagon_S2_lsl_r_vh>;
+def Hexagon_S2_lsr_r_vh:
+  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+*            STYPE/VW                                               *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def Hexagon_A2_vabsw:
+  di_SInst_di                     <"vabsw",   int_hexagon_A2_vabsw>;
+def Hexagon_A2_vabswsat:
+  di_SInst_di_sat                 <"vabsw",   int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_i_vw:
+  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_i_vw>;
+def Hexagon_S2_asr_i_vw:
+  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_i_vw>;
+def Hexagon_S2_lsr_i_vw:
+  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vw:
+  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_r_vw>;
+def Hexagon_S2_asr_r_vw:
+  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_vw>;
+def Hexagon_S2_lsl_r_vw:
+  di_SInst_disi                   <"vlslw",   int_hexagon_S2_lsl_r_vw>;
+def Hexagon_S2_lsr_r_vw:
+  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def Hexagon_S2_asr_r_svw_trun:
+  si_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_svw_trun>;
+def Hexagon_S2_asr_i_svw_trun:
+  si_SInst_diu5                   <"vasrw",   int_hexagon_S2_asr_i_svw_trun>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 0000000..68eaf68
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,29 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+      (COMBINE_rr
+                  (Hexagon_M2_maci
+                           (Hexagon_M2_maci (EXTRACT_SUBREG  (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                                           (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+                                       subreg_hireg),
+                                       (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                       (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                            (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+                            (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
+                    (EXTRACT_SUBREG  (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+                     subreg_loreg))>;
+
+
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 0000000..2a54e62
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+  di_MInst_disi_s1_sat            <"vrcmpys",  int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+  di_MInst_didisi_acc_s1_sat      <"vrcmpys",  int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+  si_MInst_disi_s1_rnd_sat        <"vrcmpys",  int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+*            MTYPE/VB                                               *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+  si_MInst_didi                   <"vradduh",  int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+*            ALU64/ALU                                              *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+  di_ALU64_sidi                   <"add",      int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+  di_ALU64_didi                   <"add",      int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+  di_ALU64_didi                   <"max",      int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+  di_ALU64_didi                   <"maxu",     int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 0000000..dd28ebb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, !$src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, $src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, or($src2, !$src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, or($src2, $src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, add($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, sub(#$src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, #$src3))")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn  : si_ALU32_sisi_not <"or",  int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si  <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8  <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq  : qi_neg_ALU32_sisi  <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte  : qi_neg_ALU32_sisi  <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi  <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9  <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq  : si_ALU32_sisi     <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8     <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+  qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+  qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+  qi_SInst_qi_andqiqi_neg         <"and",      int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+  qi_SInst_qi_andqiqi             <"and",      int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+  qi_SInst_qi_orqiqi_neg          <"and",      int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+  qi_SInst_qi_orqiqi              <"and",      int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+  qi_SInst_qi_andqiqi_neg         <"or",       int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+  qi_SInst_qi_andqiqi             <"or",       int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+  qi_SInst_qi_orqiqi_neg          <"or",       int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+  qi_SInst_qi_orqiqi              <"or",       int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+*            XTYPE/ALU                                              *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+  si_SInst_si_addsis6             <"add",      int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+  si_SInst_si_subs6si             <"add",      int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+  di_ALU64_didi_neg               <"and",      int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+  di_ALU64_didi_neg               <"or",       int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+  di_MInst_dididi_xacc            <"xor",      int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+  si_MInst_sisisi_and             <"and",      int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+  si_MInst_sisisi_and             <"or",       int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+  si_MInst_sisisi_and             <"xor",      int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+  si_MInst_sisisi_andn            <"and",      int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+  si_MInst_sisisi_xor             <"and",      int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+  si_MInst_sisisi_xor             <"or",       int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+  si_MInst_sisisi_xorn            <"and",      int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+  si_MInst_sisisi_or              <"and",      int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+  si_MInst_sisisi_or              <"or",       int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+  si_MInst_sisisi_or              <"xor",      int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+  si_MInst_sisisi_orn             <"and",      int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+  si_SInst_sisis10_andi           <"or",       int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+  si_SInst_sisis10_or             <"and",      int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+  si_SInst_sisis10_or             <"or",       int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+  si_ALU64_sisi                   <"modwrap",  int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+  si_SInst_siu5                   <"cround",   int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+  si_SInst_sisi                   <"cround",   int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+  si_SInst_siu5                   <"round",    int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+  si_SInst_sisi                   <"round",    int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+  si_SInst_siu5_sat               <"round",    int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+  si_SInst_sisi_sat               <"round",    int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+*            XTYPE/BIT                                              *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+*            XTYPE/COMPLEX                                          *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+*            XTYPE/MPY                                              *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 0000000..0318c51
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+  namespace Hexagon {
+    const unsigned int StartPacket = 0x1;
+    const unsigned int EndPacket = 0x2;
+  }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+  // SRetReturnReg - Some subtargets require that sret lowering includes
+  // returning the value of the returned struct in a register. This field
+  // holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+  std::vector<MachineInstr*> AllocaAdjustInsts;
+  int VarArgsFrameIndex;
+  bool HasClobberLR;
+
+  std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+                                                    HasClobberLR(0) {}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  void addAllocaAdjustInst(MachineInstr* MI) {
+    AllocaAdjustInsts.push_back(MI);
+  }
+  const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+    return AllocaAdjustInsts;
+  }
+
+  void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+  int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+  void setStartPacket(MachineInstr* MI) {
+    PacketInfo[MI] |= Hexagon::StartPacket;
+  }
+  void setEndPacket(MachineInstr* MI)   {
+    PacketInfo[MI] |= Hexagon::EndPacket;
+  }
+  bool isStartPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+  }
+  bool isEndPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+  }
+  void setHasClobberLR(bool v) { HasClobberLR = v;  }
+  bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
new file mode 100644
index 0000000..1229aca
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOptimizeSZExtends.cpp
@@ -0,0 +1,129 @@
+//===-- HexagonOptimizeSZExtends.cpp - Identify and remove sign and -------===//
+//===--                                zero extends.                -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+  struct HexagonOptimizeSZExtends : public MachineFunctionPass {
+
+  public:
+    static char ID;
+    HexagonOptimizeSZExtends() : MachineFunctionPass(ID) {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Hexagon remove redundant zero and size extends";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineFunctionAnalysis>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+  };
+}
+
+char HexagonOptimizeSZExtends::ID = 0;
+
+// This is a brain dead pass to get rid of redundant sign extends for the
+// following case:
+//
+// Transform the following pattern
+// %vreg170<def> = SXTW %vreg166
+// ...
+// %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+// Into
+// %vreg176<def> = COPY vreg166
+
+bool HexagonOptimizeSZExtends::runOnMachineFunction(MachineFunction &MF) {
+  DenseMap<unsigned, unsigned> SExtMap;
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    SExtMap.clear();
+
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      // Look for sign extends:
+      //   %vreg170<def> = SXTW %vreg166
+      if (MI->getOpcode() == Hexagon::SXTW) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        // Just handle virtual registers.
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Map the following:
+          //  %vreg170<def> = SXTW %vreg166
+          //  SExtMap[170] = vreg166
+          SExtMap[DstReg] = SrcReg;
+        }
+      }
+      // Look for copy:
+      //   %vreg176<def> = COPY %vreg170:subreg_loreg
+      if (MI->isCopy()) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+
+        // Make sure we are copying the lower 32 bits.
+        if (Src.getSubReg() != Hexagon::subreg_loreg)
+          continue;
+
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Try to find in the map.
+          if (unsigned SextSrc = SExtMap.lookup(SrcReg)) {
+            // Change the 1st operand.
+            MI->RemoveOperand(1);
+            MI->addOperand(MachineOperand::CreateReg(SextSrc, false));
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+FunctionPass *llvm::createHexagonOptimizeSZExtends() {
+  return new HexagonOptimizeSZExtends();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 0000000..521e0c1
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,323 @@
+//==- HexagonRegisterInfo.cpp - Hexagon Register Information -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <iostream>
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+                                     const HexagonInstrInfo &tii)
+  : HexagonGenRegisterInfo(Hexagon::R31),
+    Subtarget(st),
+   TII(tii) {
+}
+
+const unsigned* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+                                                        *MF)
+  const {
+  static const unsigned CalleeSavedRegsV2[] = {
+  Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+  };
+  static const unsigned CalleeSavedRegsV3[] = {
+    Hexagon::R16,   Hexagon::R17,   Hexagon::R18,   Hexagon::R19,
+    Hexagon::R20,   Hexagon::R21,   Hexagon::R22,   Hexagon::R23,
+    Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+  };
+
+  switch(Subtarget.getHexagonArchVersion()) {
+  case HexagonSubtarget::V2:
+    return CalleeSavedRegsV2;
+    break;
+  case HexagonSubtarget::V3:
+  case HexagonSubtarget::V4:
+    return CalleeSavedRegsV3;
+    break;
+  default:
+    const char *ErrorString = 
+      "Callee saved registers requested for unknown archtecture version";
+    llvm_unreachable(ErrorString);
+  }
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+  const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(HEXAGON_RESERVED_REG_1);
+  Reserved.set(HEXAGON_RESERVED_REG_2);
+  Reserved.set(Hexagon::R29);
+  Reserved.set(Hexagon::R30);
+  Reserved.set(Hexagon::R31);
+  Reserved.set(Hexagon::D14);
+  Reserved.set(Hexagon::D15);
+  Reserved.set(Hexagon::LC0);
+  Reserved.set(Hexagon::LC1);
+  Reserved.set(Hexagon::SA0);
+  Reserved.set(Hexagon::SA1);
+  return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    };
+  static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+  };
+
+  switch(Subtarget.getHexagonArchVersion()) {
+  case HexagonSubtarget::V2:
+    return CalleeSavedRegClassesV2;
+    break;
+  case HexagonSubtarget::V3:
+  case HexagonSubtarget::V4:
+    return CalleeSavedRegClassesV3;
+    break;
+  default:
+    const char *ErrorString = 
+      "Callee saved register classes requested for unknown archtecture version";
+    llvm_unreachable(ErrorString);
+  }
+}
+
+void HexagonRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+
+  if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+    // Hexagon_TODO: add code
+  } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+    // Hexagon_TODO: add code
+  } else {
+    assert(0 && "Cannot handle this call frame pseudo instruction");
+  }
+  MBB.erase(I);
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, RegScavenger *RS) const {
+
+  //
+  // Hexagon_TODO: Do we need to enforce this for Hexagon?
+  assert(SPAdj == 0 && "Unexpected");
+
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Addressable stack objects are accessed using neg. offsets from %fp.
+  MachineFunction &MF = *MI.getParent()->getParent();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  unsigned FrameReg = getFrameRegister(MF);
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (!TFI->hasFP(MF)) {
+    // We will not reserve space on the stack for the lr and fp registers.
+    Offset -= 2 * Hexagon_WordSize;
+  }
+
+  const unsigned FrameSize = MFI.getStackSize();
+
+  if (!MFI.hasVarSizedObjects() &&
+      TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+      !TII.isSpillPredRegOp(&MI)) {
+    // Replace frame index with a stack pointer reference.
+    MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
+    MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+  } else {
+    // Replace frame index with a frame pointer reference.
+    if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+      // If the offset overflows, then correct it.
+      //
+      // For loads, we do not need a reserved register
+      // r0 = memw(r30 + #10000) to:
+      //
+      // r0 = add(r30, #10000)
+      // r0 = memw(r0)
+      if ( (MI.getOpcode() == Hexagon::LDriw)  ||
+           (MI.getOpcode() == Hexagon::LDrid) ||
+           (MI.getOpcode() == Hexagon::LDrih) ||
+           (MI.getOpcode() == Hexagon::LDriuh) ||
+           (MI.getOpcode() == Hexagon::LDrib) ||
+           (MI.getOpcode() == Hexagon::LDriub) ) {
+        unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+          *getSubRegisters(MI.getOperand(0).getReg()) :
+          MI.getOperand(0).getReg();
+
+        // Check if offset can fit in addi.
+        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  dstReg).addReg(FrameReg).addReg(dstReg);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  dstReg).addReg(FrameReg).addImm(Offset);
+        }
+
+        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      } else if ((MI.getOpcode() == Hexagon::STriw) ||
+                 (MI.getOpcode() == Hexagon::STrid) ||
+                 (MI.getOpcode() == Hexagon::STrih) ||
+                 (MI.getOpcode() == Hexagon::STrib) ||
+                 (MI.getOpcode() == Hexagon::STriwt)) {
+        // For stores, we need a reserved register. Change
+        // memw(r30 + #10000) = r0 to:
+        //
+        // rs = add(r30, #10000);
+        // memw(rs) = r0
+        unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+        // Check if offset can fit in addi.
+        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  resReg).addReg(FrameReg).addReg(resReg);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  resReg).addReg(FrameReg).addImm(Offset);
+        }
+        MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      } else if (TII.isMemOp(&MI)) {
+        unsigned resReg = HEXAGON_RESERVED_REG_1;
+        if (!MFI.hasVarSizedObjects() &&
+            TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+          MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
+                                            true);
+          MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+        } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  resReg).addReg(FrameReg).addReg(resReg);
+          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+          MI.getOperand(i+1).ChangeToImmediate(0);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  resReg).addReg(FrameReg).addImm(Offset);
+          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+          MI.getOperand(i+1).ChangeToImmediate(0);
+        }
+      } else {
+        unsigned dstReg = MI.getOperand(0).getReg();
+        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                TII.get(Hexagon::ADD_rr),
+                dstReg).addReg(FrameReg).addReg(dstReg);
+        // Can we delete MI??? r2 = add (r2, #0).
+        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      }
+    } else {
+      // If the offset is small enough to fit in the immediate field, directly
+      // encode it.
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset);
+    }
+  }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+  return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+                                               &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (TFI->hasFP(MF)) {
+    return Hexagon::R30;
+  }
+
+  return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+  return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+  return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+                                               &Moves)  const
+{
+  // VirtualFP = (R30 + #0).
+  unsigned FPReg = getFrameRegister();
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(FPReg, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 0000000..33b0c14
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,89 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+#include "llvm/MC/MachineLocation.h"
+
+//
+//  We try not to hard code the reserved registers in our code,
+//  so the following two macros were defined. However, there
+//  are still a few places that R11 and R10 are hard wired.
+//  See below. If, in the future, we decided to change the reserved
+//  register. Don't forget changing the following places.
+//
+//  1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+//  2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+//  3. the definition of "IntRegs" in HexagonRegisterInfo.td
+//  4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+  HexagonSubtarget &Subtarget;
+  const HexagonInstrInfo &TII;
+
+  HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const* getCalleeSavedRegClasses(
+                                     const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  /// determineFrameLayout - Determine the size of the frame and maximum call
+  /// frame size.
+  void determineFrameLayout(MachineFunction &MF) const;
+
+  /// requiresRegisterScavenging - returns true since we may need scavenging for
+  /// a temporary register when generating hardware loop instructions.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getFrameRegister() const;
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  unsigned getStackRegister() const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 0000000..c05f844
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,169 @@
+//===- HexagonRegisterInfo.td - Hexagon Register defs ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+class HexagonReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "Hexagon";
+}
+
+class HexagonDoubleReg<string n, list<Register> subregs> :
+        RegisterWithSubRegs<n, subregs> {
+  field bits<5> Num;
+  let Namespace = "Hexagon";
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers.
+class Ri<bits<5> num, string n> : HexagonReg<n> {
+  let Num = num;
+}
+
+// Rf - 32-bit floating-point registers.
+class Rf<bits<5> num, string n> : HexagonReg<n> {
+  let Num = num;
+}
+
+
+// Rd - 64 bit registers.
+class Rd<bits<5> num, string n, list<Register> subregs> :
+HexagonDoubleReg<n, subregs> {
+  let Num = num;
+  let SubRegs = subregs;
+}
+
+
+class Rp<bits<5> num, string n> : HexagonReg<n> {
+   let Num = num;
+}
+
+class Rc<bits<5> num, string n> : HexagonReg<n> {
+  let Num = num;
+}
+
+let Namespace = "Hexagon" in {
+
+  def subreg_loreg  : SubRegIndex;
+  def subreg_hireg  : SubRegIndex;
+
+  // Integer registers.
+  def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+  def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+  def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+  def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+  def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+  def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+  def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+  def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+  def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+  def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+  def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+  def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+  def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+  def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+  def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+  def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+  def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+  def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+  def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+  def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+  def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+  def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+  def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+  def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+  def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+  def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+  def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+  def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+  def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+  def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+  def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+  def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+
+  def PC : Ri<31, "r31">, DwarfRegNum<[32]>;
+  def GP : Ri<31, "r31">, DwarfRegNum<[33]>;
+
+  // Aliases of the R* registers used to hold 64-bit int values (doubles).
+  let SubRegIndices = [subreg_loreg, subreg_hireg] in {
+  def D0  : Rd< 0,  "r1:0", [R0,   R1]>, DwarfRegNum<[32]>;
+  def D1  : Rd< 2,  "r3:2", [R2,   R3]>, DwarfRegNum<[34]>;
+  def D2  : Rd< 4,  "r5:4", [R4,   R5]>, DwarfRegNum<[36]>;
+  def D3  : Rd< 6,  "r7:6", [R6,   R7]>, DwarfRegNum<[38]>;
+  def D4  : Rd< 8,  "r9:8", [R8,   R9]>, DwarfRegNum<[40]>;
+  def D5  : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+  def D6  : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+  def D7  : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+  def D8  : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+  def D9  : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+  def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+  def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+  def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+  def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+  def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+  def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+  }
+
+  // Predicate registers.
+  def P0 : Rp< 0, "p0">, DwarfRegNum<[63]>;
+  def P1 : Rp< 0, "p1">, DwarfRegNum<[64]>;
+  def P2 : Rp< 0, "p2">, DwarfRegNum<[65]>;
+  def P3 : Rp< 0, "p3">, DwarfRegNum<[66]>;
+
+  // Control registers.
+  def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+  def LC0 : Rc<0, "lc0">, DwarfRegNum<[68]>;
+
+  def SA1 : Rc<0, "sa1">, DwarfRegNum<[69]>;
+  def LC1 : Rc<0, "lc1">, DwarfRegNum<[70]>;
+}
+
+
+
+
+
+
+
+
+
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "R%u", 0, 9),
+                                                       (sequence "R%u", 12, 28),
+                                                        R10, R11, R29, R30,
+                                                        R31)> {
+}
+
+
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64], 64, (add (sequence "D%u", 0,
+                                                           4),
+                                                    (sequence "D%u", 6, 13),
+                                                    D5, D14, D15)> {
+  let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
+}
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+  let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32, (add (sequence "LC%u", 0, 1),
+                                                      (sequence "SA%u", 0, 1),
+                                                      PC)> {
+  let Size = 32;
+}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 0000000..3ca257f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,85 @@
+//=- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends --=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "HexagonTargetMachine.h"
+#include <iostream>
+
+using namespace llvm;
+namespace {
+  struct HexagonRemoveExtendArgs : public FunctionPass {
+  public:
+    static char ID;
+    HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+    virtual bool runOnFunction(Function &F);
+
+    const char *getPassName() const {
+      return "Remove sign extends";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineFunctionAnalysis>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+                                        "Remove Sign and Zero Extends for Args"
+                                        );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+  unsigned Idx = 1;
+  for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+       ++AI, ++Idx) {
+    if (F.paramHasAttr(Idx, Attribute::SExt)) {
+      Argument* Arg = AI;
+      if (!isa<PointerType>(Arg->getType())) {
+        for (Instruction::use_iterator UI = Arg->use_begin();
+             UI != Arg->use_end();) {
+          if (isa<SExtInst>(*UI)) {
+            Instruction* Use = cast<Instruction>(*UI);
+            SExtInst* SI = new SExtInst(Arg, Use->getType());
+            assert (EVT::getEVT(SI->getType()) ==
+                    (EVT::getEVT(Use->getType())));
+            ++UI;
+            Use->replaceAllUsesWith(SI);
+            Instruction* First = F.getEntryBlock().begin();
+            SI->insertBefore(First);
+            Use->eraseFromParent();
+          } else {
+            ++UI;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+  return new HexagonRemoveExtendArgs();
+}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 0000000..427d1cb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,53 @@
+//===-HexagonSchedule.td - Hexagon Scheduling Definitions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LUNIT     : FuncUnit;
+def LSUNIT    : FuncUnit;
+def MUNIT     : FuncUnit;
+def SUNIT     : FuncUnit;
+
+
+// Itinerary classes
+def ALU32     : InstrItinClass;
+def ALU64     : InstrItinClass;
+def CR        : InstrItinClass;
+def J         : InstrItinClass;
+def JR        : InstrItinClass;
+def LD        : InstrItinClass;
+def M         : InstrItinClass;
+def ST        : InstrItinClass;
+def S         : InstrItinClass;
+def PSEUDO    : InstrItinClass;
+
+
+def HexagonItineraries :
+ ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+  InstrItinData<ALU32      , [InstrStage<1,  [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+  InstrItinData<ALU64      , [InstrStage<1,  [MUNIT, SUNIT]>]>,
+  InstrItinData<CR         , [InstrStage<1,  [SUNIT]>]>,
+  InstrItinData<J          , [InstrStage<1,  [SUNIT, MUNIT]>]>,
+  InstrItinData<JR         , [InstrStage<1,  [MUNIT]>]>,
+  InstrItinData<LD         , [InstrStage<1,  [LUNIT, LSUNIT]>]>,
+  InstrItinData<M          , [InstrStage<1,  [MUNIT, SUNIT]>]>,
+  InstrItinData<ST         , [InstrStage<1,  [LSUNIT]>]>,
+  InstrItinData<S          , [InstrStage<1,  [SUNIT, MUNIT]>]>,
+  InstrItinData<PSEUDO     , [InstrStage<1,  [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 0000000..4cf66fe
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,56 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+
+
+// Functional Units.
+def SLOT0       : FuncUnit;
+def SLOT1       : FuncUnit;
+def SLOT2       : FuncUnit;
+def SLOT3       : FuncUnit;
+
+// Itinerary classes.
+def NV_V4       : InstrItinClass;
+def MEM_V4      : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+
+def HexagonItinerariesV4 : ProcessorItineraries<
+                    [SLOT0, SLOT1, SLOT2, SLOT3], [], [
+  InstrItinData<LD            , [InstrStage<1,  [SLOT0, SLOT1]>]>,
+  InstrItinData<ST            , [InstrStage<1,  [SLOT0, SLOT1]>]>,
+  InstrItinData<ALU32         , [InstrStage<1,  [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+  InstrItinData<NV_V4         , [InstrStage<1,  [SLOT0]>]>,
+  InstrItinData<MEM_V4        , [InstrStage<1,  [SLOT0]>]>,
+  InstrItinData<J             , [InstrStage<1,  [SLOT2, SLOT3]>]>,
+  InstrItinData<JR            , [InstrStage<1,  [SLOT2]>]>,
+  InstrItinData<CR            , [InstrStage<1,  [SLOT3]>]>,
+  InstrItinData<PSEUDO        , [InstrStage<1,  [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+  InstrItinData<ALU64         , [InstrStage<1,  [SLOT2, SLOT3]>]>,
+  InstrItinData<M             , [InstrStage<1,  [SLOT2, SLOT3]>]>,
+  InstrItinData<S             , [InstrStage<1,  [SLOT2, SLOT3]>]>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 0000000..f21d928
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//=-HexagoSelectCCInfo.td - Selectcc mappings ----------------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (NOT_Ps (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGT)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGT)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULT)),
+      (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs,
+                                         (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLT)),
+      (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs,
+                                        (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLE)),
+      (i32 (MUX_rr (i1 (NOT_Ps (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULE)),
+      (i32 (MUX_rr (i1 (NOT_Ps (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGE)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGE)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+//  pt = not(p1 xor p2)
+//  Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+                   IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (NOT_pp (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+//   combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+//           mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETGT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETLT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                     (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                      (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a52c604
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+                                                 &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+                        bool isVolatile, bool AlwaysInline,
+                        MachinePointerInfo DstPtrInfo,
+                        MachinePointerInfo SrcPtrInfo) const {
+  flag_aligned_memcpy = false;
+  if ((Align & 0x3) == 0) {
+    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+    if (ConstantSize) {
+      uint64_t SizeVal = ConstantSize->getZExtValue();
+      if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+        flag_aligned_memcpy = true;
+    }
+  }
+
+  return SDValue();
+}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 0000000..86fa026
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//=-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+  ~HexagonSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 0000000..f4d3647
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,136 @@
+//===---- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===////
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+//   {p0 = cmp.eq(r0,r1)}
+//   {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets.  If we use .new predicated immediate transfers, 
+// then we can do this in a single packet, e.g.:
+//
+//   {p0 = cmp.eq(r0,r1)
+//    if (p0.new) r3 = #1
+//    if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include <map>
+#include <iostream>
+
+#include "llvm/Support/CommandLine.h"
+#define DEBUG_TYPE "xfer"
+
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+    HexagonTargetMachine& QTM;
+    const HexagonSubtarget &QST;
+
+ public:
+    static char ID;
+    HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+    const char *getPassName() const {
+      return "Hexagon Split TFRCondSets";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+  const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::TFR_condset_rr) {
+
+        int DestReg = MI->getOperand(0).getReg();
+        int SrcReg1 = MI->getOperand(2).getReg();
+        int SrcReg2 = MI->getOperand(3).getReg();
+
+        // Minor optimization: do not emit the predicated copy if the source and
+        // the destination is the same register
+        if (DestReg != SrcReg1) {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
+                  DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+        }
+        if (DestReg != SrcReg2) {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
+                  DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::TFR_condset_ii) {
+        int DestReg = MI->getOperand(0).getReg();
+        int SrcReg1 = MI->getOperand(1).getReg();
+        int Immed1 = MI->getOperand(2).getImm();
+        int Immed2 = MI->getOperand(3).getImm();
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
+                DestReg).addReg(SrcReg1).addImm(Immed1);
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
+                DestReg).addReg(SrcReg1).addImm(Immed2);
+        MII = MBB->erase(MI);
+        --MII;
+      }
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+  return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 0000000..83fb498
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,59 @@
+//===- HexagonSubtarget.cpp - Hexagon Subtarget Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+         cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+    "enable-hexagon-memops",
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
+    cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+  HexagonGenSubtargetInfo(TT, CPU, FS),
+  HexagonArchVersion(V1),
+  CPUString(CPU.str()) {
+  ParseSubtargetFeatures(CPU, FS);
+
+  switch(HexagonArchVersion) {
+  case HexagonSubtarget::V2:
+    break;
+  case HexagonSubtarget::V3:
+    EnableV3 = true;
+    break;
+  case HexagonSubtarget::V4:
+    break;
+  default:
+    llvm_unreachable("Unknown Architecture Version.");
+  }
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
+
+  if (EnableMemOps)
+    UseMemOps = true;
+  else
+    UseMemOps = false;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 0000000..6de85df
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,74 @@
+//==-- HexagonSubtarget.h - Define Subtarget for the Hexagon ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+  bool UseMemOps;
+
+public:
+  enum HexagonArchEnum {
+    V1, V2, V3, V4
+  };
+
+  HexagonArchEnum HexagonArchVersion;
+  std::string CPUString;
+  InstrItineraryData InstrItins;
+
+public:
+  HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+  bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+  bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+  bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+  bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+  bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+
+  bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+  const std::string &getCPUString () const { return CPUString; }
+
+  // Threshold for small data section
+  unsigned getSmallDataThreshold() const {
+    return Hexagon_SMALL_DATA_THRESHOLD;
+  }
+  const HexagonArchEnum &getHexagonArchVersion() const {
+    return  HexagonArchVersion;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 0000000..b29e92c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,118 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <iostream>
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+                        "disable-hexagon-hwloops", cl::Hidden,
+                        cl::desc("Disable Hardware Loops for Hexagon target"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library.  In particular, it seems that it is not possible to get
+/// things to work on Win32 without this.  Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+  // Register the target.
+  RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           TargetOptions Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+    Subtarget(TT, CPU, FS), TLInfo(*this), InstrInfo(Subtarget),
+    TSInfo(*this),
+    FrameLowering(Subtarget),
+    InstrItins(&Subtarget.getInstrItineraryData()) {
+  setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+
+  PM.add(createConstantPropagationPass());
+  PM.add(createLoopSimplifyPass());
+  PM.add(createDeadCodeEliminationPass());
+  PM.add(createConstantPropagationPass());
+  PM.add(createLoopUnrollPass());
+  PM.add(createLoopStrengthReducePass(getTargetLowering()));
+  return true;
+}
+
+bool HexagonTargetMachine::addInstSelector(PassManagerBase &PM) {
+  PM.add(createHexagonRemoveExtendOps(*this));
+  PM.add(createHexagonISelDag(*this));
+  return false;
+}
+
+
+bool HexagonTargetMachine::addPreRegAlloc(PassManagerBase &PM) {
+  if (!DisableHardwareLoops) {
+    PM.add(createHexagonHardwareLoops());
+  }
+
+  return false;
+}
+
+bool HexagonTargetMachine::addPostRegAlloc(PassManagerBase &PM) {
+  PM.add(createHexagonCFGOptimizer(*this));
+  return true;
+}
+
+
+bool HexagonTargetMachine::addPreSched2(PassManagerBase &PM) {
+  PM.add(createIfConverterPass());
+  return true;
+}
+
+bool HexagonTargetMachine::addPreEmitPass(PassManagerBase &PM) {
+
+  if (!DisableHardwareLoops) {
+    PM.add(createHexagonFixupHwLoops());
+  }
+
+  // Expand Spill code for predicate registers.
+  PM.add(createHexagonExpandPredSpillCode(*this));
+
+  // Split up TFRcondsets into conditional transfers.
+  PM.add(createHexagonSplitTFRCondSets(*this));
+
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 0000000..e27d3ae
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,86 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonFrameLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+  const TargetData DataLayout;       // Calculates type size & alignment.
+  HexagonSubtarget Subtarget;
+  HexagonTargetLowering TLInfo;
+  HexagonInstrInfo InstrInfo;
+  HexagonSelectionDAGInfo TSInfo;
+  HexagonFrameLowering FrameLowering;
+  const InstrItineraryData* InstrItins;
+
+public:
+  HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+                       StringRef FS, TargetOptions Options, Reloc::Model RM,
+                       CodeModel::Model CM, CodeGenOpt::Level OL);
+
+  virtual const HexagonInstrInfo *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual const HexagonSubtarget *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+  virtual const HexagonRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const InstrItineraryData* getInstrItineraryData() const {
+    return InstrItins;
+  }
+
+
+  virtual const HexagonTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const HexagonFrameLowering* getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  static unsigned getModuleMatchQuality(const Module &M);
+
+  // Pass Pipeline Configuration.
+  virtual bool addPassesForOptimizations(PassManagerBase &PM);
+  virtual bool addInstSelector(PassManagerBase &PM);
+  virtual bool addPreEmitPass(PassManagerBase &PM);
+  virtual bool addPreRegAlloc(llvm::PassManagerBase &PM);
+  virtual bool addPostRegAlloc(PassManagerBase &PM);
+  virtual bool addPreSched2(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 0000000..188337d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+                                cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  // If the primary definition of this global value is outside the current
+  // translation unit or the global value is available for inspection but not
+  // emission, then do nothing.
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+  // up in getKindForGlobal(GV, TM).
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+    Type *Ty = GV->getType()->getElementType();
+    return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+  }
+
+  return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 0000000..101c1f2
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+  class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSectionELF *SmallDataSection;
+    const MCSectionELF *SmallBSSSection;
+  public:
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 0000000..21b2d67
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//==-- HexagonVarargsCallingConvention.h - Calling Conventions ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                    ISD::ArgFlagsTy ArgFlags,
+                                    Hexagon_CCState &State,
+                                    int NonVarArgsParams,
+                                    int CurrentParam,
+                                    bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                 EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                 ISD::ArgFlagsTy ArgFlags,
+                                 Hexagon_CCState &State,
+                                 int NonVarArgsParams,
+                                 int CurrentParam,
+                                 bool ForceMem) {
+  unsigned ByValSize = 0;
+  if (ArgFlags.isByVal() &&
+      ((ByValSize = ArgFlags.getByValSize()) >
+       (MVT(MVT::i64).getSizeInBits() / 8))) {
+    ForceMem = true;
+  }
+
+
+  // Only assign registers for named (non varargs) arguments
+  if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+                                                  NonVarArgsParams))) {
+
+    if (LocVT == MVT::i32 ||
+        LocVT == MVT::i16 ||
+        LocVT == MVT::i8 ||
+        LocVT == MVT::f32) {
+      static const unsigned RegList1[] = {
+        Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+        Hexagon::R5
+      };
+      if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                         LocVT.getSimpleVT(), LocInfo));
+        return false;
+      }
+    }
+
+    if (LocVT == MVT::i64 ||
+        LocVT == MVT::f64) {
+      static const unsigned RegList2[] = {
+        Hexagon::D0, Hexagon::D1, Hexagon::D2
+      };
+      if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                         LocVT.getSimpleVT(), LocInfo));
+        return false;
+      }
+    }
+  }
+
+  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+  unsigned Alignment =
+    State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+  unsigned Size =
+    State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+  // If it's passed by value, then we need the size of the aggregate not of
+  // the pointer.
+  if (ArgFlags.isByVal()) {
+    Size = ByValSize;
+
+    // Hexagon_TODO: Get the alignment of the contained type here.
+    Alignment = 8;
+  }
+
+  unsigned Offset3 = State.AllocateStack(Size, Alignment);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+                                   LocVT.getSimpleVT(), LocInfo));
+  return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                    ISD::ArgFlagsTy ArgFlags,
+                                    Hexagon_CCState &State,
+                                    int NonVarArgsParams,
+                                    int CurrentParam,
+                                    bool ForceMem) {
+
+  if (LocVT == MVT::i32 ||
+      LocVT == MVT::f32) {
+    static const unsigned RegList1[] = {
+      Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+      Hexagon::R5
+    };
+    if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                       LocVT.getSimpleVT(), LocInfo));
+      return false;
+    }
+  }
+
+  if (LocVT == MVT::i64 ||
+      LocVT == MVT::f64) {
+    static const unsigned RegList2[] = {
+      Hexagon::D0, Hexagon::D1, Hexagon::D2
+    };
+    if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                       LocVT.getSimpleVT(), LocInfo));
+      return false;
+    }
+  }
+
+  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+  unsigned Alignment =
+    State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+  unsigned Size =
+    State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+  unsigned Offset3 = State.AllocateStack(Size, Alignment);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+                                   LocVT.getSimpleVT(), LocInfo));
+  return false;
+}
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
new file mode 100644
index 0000000..84ea6a0
--- /dev/null
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo MCTargetDesc
+
+[component_0]
+type = TargetGroup
+name = Hexagon
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = HexagonCodeGen
+parent = Hexagon
+required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..8e3da99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMHexagonDesc
+  HexagonMCTargetDesc.cpp
+  HexagonMCAsmInfo.cpp
+  )
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 0000000..188693c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,36 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "//";
+  HasLEB128 = true;
+
+  PrivateGlobalPrefix = ".L";
+  LCOMMDirectiveType = LCOMM::ByteAlignment;
+  InlineAsmStart = "# InlineAsm Start";
+  InlineAsmEnd = "# InlineAsm End";
+  ZeroDirective = "\t.space\t";
+  AscizDirective = "\t.string\t";
+  WeakRefDirective = "\t.weak\t";
+
+  UsesELFSectionDirectiveForBSS  = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 0000000..8196e95
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties ---------*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class HexagonMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 0000000..625f07c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitHexagonMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitHexagonMCRegisterInfo(X, Hexagon::R0);
+  return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+  MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+  // VirtualFP = (R30 + #0).
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(Hexagon::R30, 0);
+  MAI->addInitialFrameState(0, Dst, Src);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  // For the time being, use static relocations, since there's really no
+  // support for PIC yet.
+  X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+                                        createHexagonMCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+                                    createHexagonMCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+                                          createHexagonMCSubtargetInfo);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 0000000..364841f
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheHexagonTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 0000000..1114d99
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonDesc
+parent = Hexagon
+required_libraries = HexagonInfo MC
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
new file mode 100644
index 0000000..67be2bc
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
new file mode 100644
index 0000000..c936e92
--- /dev/null
+++ b/lib/Target/Hexagon/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMHexagonCodeGen
+TARGET = Hexagon
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = HexagonGenRegisterInfo.inc \
+                HexagonGenInstrInfo.inc  \
+                HexagonGenAsmWriter.inc \
+                HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+                HexagonGenCallingConv.inc \
+                HexagonAsmPrinter.cpp
+
+DIRS = TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5b04a30
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonInfo
+  HexagonTargetInfo.cpp
+  )
+
+add_dependencies(LLVMHexagonInfo HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 0000000..7aa5dd3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+  RegisterTarget<Triple::hexagon, /*HasJIT=*/false>  X(TheHexagonTarget, "hexagon", "Hexagon");
+}
diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
new file mode 100644
index 0000000..7b87be3
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonInfo
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile
new file mode 100644
index 0000000..494cca1
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 358cbc8..5a42ca5 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+
 ; This is a special group whose required libraries are extended (by llvm-build)
 ; with the best execution engine (the native JIT, if available, or the
 ; interpreter).
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index ec8f52a..813767b 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser
   MBlazeAsmParser.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeAsmParser
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
index 2c61a7f..b10189a 100644
--- a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
+++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmParser
 parent = MBlaze
 required_libraries = MBlazeInfo MC MCParser Support
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index d3f1383..71095e5 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MBlazeCodeGen
   MBlazeELFWriterInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMBlazeAsmPrinter
-  LLVMMBlazeDesc
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index e0a53ee..be2dce1 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,11 +13,4 @@ set_property(
   )
 endif()
 
-add_llvm_library_dependencies(LLVMMBlazeDisassembler
-  LLVMMBlazeDesc
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
index c5c4f80..28dd9dc 100644
--- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
+++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDisassembler
 parent = MBlaze
 required_libraries = MBlazeDesc MBlazeInfo MC Support
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 3087317..ccc3a05 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -123,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) {
     case 0x41: return MBlaze::SRL;
     case 0x21: return MBlaze::SRC;
     case 0x01: return MBlaze::SRA;
+    case 0xE0: return MBlaze::CLZ;
     }
 }
 
@@ -176,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) {
 }
 
 static unsigned decodeBRI(uint32_t insn) {
+    switch (insn&0x3FFFFFF) {
+    default:        break;
+    case 0x0020004: return MBlaze::IDMEMBAR;
+    case 0x0220004: return MBlaze::DMEMBAR;
+    case 0x0420004: return MBlaze::IMEMBAR;
+    }
+
     switch ((insn>>16)&0x1F) {
     default:   return UNSUPPORTED;
     case 0x00: return MBlaze::BRI;
@@ -531,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
   default: 
     return Fail;
 
+  case MBlazeII::FC:
+    break;
+
   case MBlazeII::FRRRR:
     if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
       return Fail;
@@ -547,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
     instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
+  case MBlazeII::FRR:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return Fail;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+
   case MBlazeII::FRI:
     switch (opcode) {
     default: 
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index aff0b3d..586e2d3 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
   MBlazeInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
index 7a21f1e..3a21a05 100644
--- a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeAsmPrinter
 parent = MBlaze
 required_libraries = MC Support
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 570ab08..5297563 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//===-- MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt
index f1a3f5d..0b29007 100644
--- a/lib/Target/MBlaze/LLVMBuild.txt
+++ b/lib/Target/MBlaze/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = MBlaze
@@ -29,4 +32,3 @@ name = MBlazeCodeGen
 parent = MBlaze
 required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index ff051e3..c751dd8 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -310,9 +310,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
 
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+  while (I != Pred->begin() && !(--I)->isTerminator())
     ; // Noop
-  return I == Pred->end() || !I->getDesc().isBarrier();
+  return I == Pred->end() || !I->isBarrier();
 }
 
 // Force static initialization.
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index c07570a..19e787d 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -29,13 +29,11 @@ using namespace llvm;
 
 STATISTIC(FilledSlots, "Number of delay slots filled");
 
-namespace llvm {
-cl::opt<bool> DisableDelaySlotFiller(
+static cl::opt<bool> MBDisableDelaySlotFiller(
   "disable-mblaze-delay-filler",
   cl::init(false),
   cl::desc("Disable the MBlaze delay slot filter."),
   cl::Hidden);
-}
 
 namespace {
   struct Filler : public MachineFunctionPass {
@@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // Hazard check
   MachineBasicBlock::iterator a = candidate;
   MachineBasicBlock::iterator b = slot;
-  MCInstrDesc desc = candidate->getDesc();
 
   // MBB layout:-
   //    candidate := a0 = operation(a1, a2)
@@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // 4. b0 is one or more of {a1, a2}
   // 5. a accesses memory, and the middle bit
   //    contains a store operation.
-  bool a_is_memory = desc.mayLoad() || desc.mayStore();
+  bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
 
   // Determine the number of operands in the slot instruction and in the
   // candidate instruction.
@@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
     }
 
     // Check hazard type 5
-    if (a_is_memory && m->getDesc().mayStore())
+    if (a_is_memory && m->mayStore())
       return true;
   }
 
@@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
   if (candidate == MBB.begin())
     return false;
 
-  MCInstrDesc brdesc = (--candidate)->getDesc();
-  return (brdesc.hasDelaySlot());
+  --candidate;
+  return (candidate->hasDelaySlot());
 }
 
 static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
@@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
       break;
 
     --I;
-    MCInstrDesc desc = I->getDesc();
-    if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
-        desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+    if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+        I->isCall() || I->isReturn() || I->isBarrier() ||
         hasUnknownSideEffects(I))
       break;
 
@@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
 
-      if (!DisableDelaySlotFiller)
+      if (!MBDisableDelaySlotFiller)
         D = findDelayInstr(MBB,I);
 
       ++FilledSlots;
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index f28d5a7..37919bc 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -32,13 +32,11 @@
 
 using namespace llvm;
 
-namespace llvm {
-  cl::opt<bool> DisableStackAdjust(
-    "disable-mblaze-stack-adjust",
-    cl::init(false),
-    cl::desc("Disable MBlaze stack layout adjustment."),
-    cl::Hidden);
-}
+static cl::opt<bool> MBDisableStackAdjust(
+  "disable-mblaze-stack-adjust",
+  cl::init(false),
+  cl::desc("Disable MBlaze stack layout adjustment."),
+  cl::Hidden);
 
 static void replaceFrameIndexes(MachineFunction &MF, 
                                 SmallVector<std::pair<int,int64_t>, 16> &FR) {
@@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF,
 //===----------------------------------------------------------------------===//
 
 static void analyzeFrameIndexes(MachineFunction &MF) {
-  if (DisableStackAdjust) return;
+  if (MBDisableStackAdjust) return;
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
@@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
 // if frame pointer elimination is disabled.
 bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+         MFI->hasVarSizedObjects();
 }
 
 void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 148d906..0002174 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
   setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,    MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::i32,   Expand);
   setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
   setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
 
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 54f605f..4c6034d 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -35,6 +35,7 @@ def FRIR    : Format<17>; // RSUBI
 def FRRRR   : Format<18>; // RSUB, FRSUB
 def FRI     : Format<19>; // RSUB, FRSUB
 def FC      : Format<20>; // NOP
+def FRR     : Format<21>; // CLZ
 
 //===----------------------------------------------------------------------===//
 //  Describe MBlaze instructions format
@@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
   let Inst{11-16} = flags;
   let Inst{17-31} = imm15;
 }
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+           list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<5>  ra;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15}  = ra;
+  let Inst{16-31}  = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+           list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+  let Inst{6-31}  = flags;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 1d8c987..9fe2a49 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
 //===----------------------------------------------------------------------===//
 
 let neverHasSideEffects = 1 in {
-  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
+  def NOP :  MBlazeInst<0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
 }
 
+let Predicates=[HasPatCmp] in {
+  def CLZ :  TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+                  "clz    $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR  : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar   2", [], IIC_ALU>;
+def DMEMBAR  : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar   1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar   0", [], IIC_ALU>;
+
 let usesCustomInserter = 1 in {
   def Select_CC : MBlazePseudo<(outs GPR:$dst),
     (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
@@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
 def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
 
 // SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 6)>;
+
 def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
           (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
                      (CMP GPR:$R, GPR:$L), 1)>;
@@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
           (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
 
 // SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
 def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
                     (i32 GPR:$T), (i32 GPR:$F), SETEQ),
           (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
@@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>;
 def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
 
 // BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+          (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+          (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+          (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+          (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+          (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+          (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+          (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+          (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+          (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+          (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
 def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
           (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
 def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
@@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
 def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
 
 // 16-bit load and store
-def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
 def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
 
 // 8-bit load and store
-def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
 def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
 
 // Peepholes
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index a7e400b..7e5598f 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 4ad7bd6..5ed81dd 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -33,16 +33,16 @@ extern "C" void LLVMInitializeMBlazeTarget() {
 // an easier handling.
 MBlazeTargetMachine::
 MBlazeTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
                     Reloc::Model RM, CodeModel::Model CM,
-                    CodeGenOpt::Level OL):
-  LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
-  Subtarget(TT, CPU, FS),
-  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
-  InstrInfo(*this),
-  FrameLowering(Subtarget),
-  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
-  InstrItins(Subtarget.getInstrItineraryData()) {
+                    CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
 }
 
 // Install an instruction selector pass using
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 1c1aa53..036f1b6 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -43,6 +43,7 @@ namespace llvm {
   public:
     MBlazeTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
+                        const TargetOptions &Options,
                         Reloc::Model RM, CodeModel::Model CM,
                         CodeGenOpt::Level OL);
 
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 37871b6..6fa7f43 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMBlazeDesc
   MBlazeMCTargetDesc.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeDesc
-  LLVMMBlazeAsmPrinter
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
index e89811b..4982f0f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeDesc
 parent = MBlaze
 required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f7d46..d5acbe9 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -58,6 +58,11 @@ public:
 
   bool MayNeedRelaxation(const MCInst &Inst) const;
 
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
   void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -87,6 +92,18 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
   return hasExprOrImm;
 }
 
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                            uint64_t Value,
+                                            const MCInstFragment *DF,
+                                            const MCAsmLayout &Layout) const {
+  // FIXME: Is this right? It's what the "generic" code was doing before,
+  // but is X86 specific. Is it actually true for MBlaze also, or was it
+  // just close enough to not be a big deal?
+  //
+  // Relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
 void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
   Res = Inst;
   Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
index 776dbc4..c8bdd6f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -51,6 +51,7 @@ namespace MBlazeII {
     FRRRR,
     FRI,
     FC,
+    FRR,
     FormMask = 63
 
     //===------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 93fce58..b554d9b 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo
   MBlazeTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
index 938a1d9..ba7ee5d 100644
--- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MBlazeInfo
 parent = MBlaze
 required_libraries = MC Support Target
 add_to_library_groups = MBlaze
-
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 55c2d7d..7daa7a2 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -22,19 +22,6 @@ add_llvm_target(MSP430CodeGen
   MSP430MCInstLower.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430CodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMSP430AsmPrinter
-  LLVMMSP430Desc
-  LLVMMSP430Info
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index ce39d95..64ac994 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
   MSP430InstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430AsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
index aeb863a..37b8c25 100644
--- a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430AsmPrinter
 parent = MSP430
 required_libraries = MC Support
 add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt
index 024312b..51d9702 100644
--- a/lib/Target/MSP430/LLVMBuild.txt
+++ b/lib/Target/MSP430/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = MSP430
@@ -27,4 +30,3 @@ name = MSP430CodeGen
 parent = MSP430
 required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target
 add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index c2dd448..adc95c5 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -3,12 +3,4 @@ add_llvm_library(LLVMMSP430Desc
   MSP430MCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430Desc
-  LLVMMC
-  LLVMMSP430AsmPrinter
-  LLVMMSP430Info
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
index 1890e9d..3319d93 100644
--- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Desc
 parent = MSP430
 required_libraries = MC MSP430AsmPrinter MSP430Info Support Target
 add_to_library_groups = MSP430
-
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index c99f4ab..e406ff2 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  return (DisableFramePointerElim(MF) ||
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           MF.getFrameInfo()->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
 }
@@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = prior(MBBI);
     unsigned Opc = PI->getOpcode();
-    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+    if (Opc != MSP430::POP16r && !PI->isTerminator())
       break;
     --MBBI;
   }
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 5c94137..884d69b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -122,8 +122,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
 
   setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
   setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::i16,   Expand);
   setOperationAction(ISD::CTLZ,             MVT::i8,    Expand);
   setOperationAction(ISD::CTLZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::i16,   Expand);
   setOperationAction(ISD::CTPOP,            MVT::i8,    Expand);
   setOperationAction(ISD::CTPOP,            MVT::i16,   Expand);
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 81f766e..9d3c7e9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -158,13 +158,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
+  if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
+  if (MI->isBranch() && !MI->isBarrier())
     return true;
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -189,7 +188,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // A terminator that isn't a branch can't easily be handled
     // by this analysis.
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     // Cannot handle indirect branches.
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index fe185fb..a0fc3da 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -28,9 +28,10 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
                                          StringRef TT,
                                          StringRef CPU,
                                          StringRef FS,
+                                         const TargetOptions &Options,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     // FIXME: Check TargetData string.
     DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 4fb060f..28d482a 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
 
 public:
   MSP430TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
 
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1526946..f6b40ea 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info
   MSP430TargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430Info
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
index a745ea8..deafc2d 100644
--- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MSP430Info
 parent = MSP430
 required_libraries = MC Support Target
 add_to_library_groups = MSP430
-
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index ac9cfc0..a13c0e8 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -29,19 +29,6 @@ add_llvm_target(MipsCodeGen
   MipsSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMipsAsmPrinter
-  LLVMMipsDesc
-  LLVMMipsInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c45b35d..3e9fbf1 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
   MipsInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
index d953a61..317057b 100644
--- a/lib/Target/Mips/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsAsmPrinter
 parent = Mips
 required_libraries = MC Support
 add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index f544d39..3e9c46a 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -96,10 +96,14 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
   case MCSymbolRefExpr::VK_None:     break;
   case MCSymbolRefExpr::VK_Mips_GPREL:    OS << "%gp_rel("; break;
   case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT16:    OS << "%got(";    break;
   case MCSymbolRefExpr::VK_Mips_GOT:      OS << "%got(";    break;
   case MCSymbolRefExpr::VK_Mips_ABS_HI:   OS << "%hi(";     break;
   case MCSymbolRefExpr::VK_Mips_ABS_LO:   OS << "%lo(";     break;
   case MCSymbolRefExpr::VK_Mips_TLSGD:    OS << "%tlsgd(";  break;
+  case MCSymbolRefExpr::VK_Mips_TLSLDM:   OS << "%tlsldm(";  break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_HI:OS << "%dtprel_hi(";  break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_LO:OS << "%dtprel_lo(";  break;
   case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break;
   case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
   case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
index e733b52..bcd32bc 100644
--- a/lib/Target/Mips/LLVMBuild.txt
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = Mips
@@ -28,4 +31,3 @@ name = MipsCodeGen
 parent = Mips
 required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
 add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 2ceb5c9..0eb0a55 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -5,11 +5,4 @@ add_llvm_library(LLVMMipsDesc
   MipsMCTargetDesc.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsDesc
-  LLVMMC
-  LLVMMipsAsmPrinter
-  LLVMMipsInfo
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
index d6f5dd2..29f5da6 100644
--- a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsDesc
 parent = Mips
 required_libraries = MC MipsAsmPrinter MipsInfo Support
 add_to_library_groups = Mips
-
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 7bc5fe4..60ff4fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -29,13 +29,19 @@
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
+// Prepare value for the target space for it
 static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
 
   // Add/subtract and shift
   switch (Kind) {
   default:
+    return 0;
+  case FK_GPRel_4:
+  case FK_Data_4:
+  case Mips::fixup_Mips_LO16:
     break;
   case Mips::fixup_Mips_PC16:
     // So far we are only using this type for branches.
@@ -52,25 +58,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     // address range.
     Value >>= 2;
     break;
-  }
-
-  // Mask off value for placement as an operand
-  switch (Kind) {
-  default:
-    break;
-  case FK_GPRel_4:
-  case FK_Data_4:
-    Value &= 0xffffffff;
-    break;
-  case Mips::fixup_Mips_26:
-    Value &= 0x03ffffff;
-    break;
-  case Mips::fixup_Mips_LO16:
-  case Mips::fixup_Mips_PC16:
-    Value &= 0x0000ffff;
-    break;
   case Mips::fixup_Mips_HI16:
-    Value >>= 16;
+  case Mips::fixup_Mips_GOT_Local:
+    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    Value = (Value >> 16) + ((Value & 0x8000) != 0);
     break;
   }
 
@@ -96,42 +87,40 @@ public:
   /// fixup kind as appropriate.
   void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const {
-    unsigned Kind = (unsigned)Fixup.getKind();
-    Value = adjustFixupValue(Kind, Value);
+    MCFixupKind Kind = Fixup.getKind();
+    Value = adjustFixupValue((unsigned)Kind, Value);
 
     if (!Value)
-      return;           // Doesn't change encoding.
+      return; // Doesn't change encoding.
 
     unsigned Offset = Fixup.getOffset();
-    switch (Kind) {
-    default:
-      llvm_unreachable("Unknown fixup kind!");
-    case Mips::fixup_Mips_GOT16: // This will be fixed up at link time
-     break;
-    case FK_GPRel_4:
-    case FK_Data_4:
-    case Mips::fixup_Mips_26:
-    case Mips::fixup_Mips_LO16:
-    case Mips::fixup_Mips_PC16:
-    case Mips::fixup_Mips_HI16:
-      // For each byte of the fragment that the fixup touches, mask i
-      // the fixup value. The Value has been "split up" into the appr
-      // bitfields above.
-      for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes
-        Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff);
-      break;
+    // FIXME: The below code will not work across endian models
+    // How many bytes/bits are we fixing up?
+    unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1;
+    uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1;
+
+    // Grab current value, if any, from bits.
+    uint64_t CurVal = 0;
+    for (unsigned i = 0; i != NumBytes; ++i)
+      CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8);
+
+    CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask);
+
+    // Write out the bytes back to the code/data bits.
+    // First the unaffected bits and then the fixup.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff);
     }
-  }
+}
 
   unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
 
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
     const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
-      // This table *must* be in the order that the fixup_* kinds a
+      // This table *must* be in same the order of fixup_* kinds in
       // MipsFixupKinds.h.
       //
       // name                    offset  bits  flags
-      { "fixup_Mips_NONE",         0,      0,   0 },
       { "fixup_Mips_16",           0,     16,   0 },
       { "fixup_Mips_32",           0,     32,   0 },
       { "fixup_Mips_REL32",        0,     32,   0 },
@@ -140,7 +129,8 @@ public:
       { "fixup_Mips_LO16",         0,     16,   0 },
       { "fixup_Mips_GPREL16",      0,     16,   0 },
       { "fixup_Mips_LITERAL",      0,     16,   0 },
-      { "fixup_Mips_GOT16",        0,     16,   0 },
+      { "fixup_Mips_GOT_Global",   0,     16,   0 },
+      { "fixup_Mips_GOT_Local",    0,     16,   0 },
       { "fixup_Mips_PC16",         0,     16,  MCFixupKindInfo::FKF_IsPCRel },
       { "fixup_Mips_CALL16",       0,     16,   0 },
       { "fixup_Mips_GPREL32",      0,     32,   0 },
@@ -173,6 +163,17 @@ public:
     return false;
   }
 
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+    return false;
+  }
+
   /// RelaxInstruction - Relax the instruction in the given fragment
   /// to the next wider instruction.
   ///
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index cebfde0..00fc5df 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -31,8 +31,9 @@ namespace MipsII {
 
     MO_NO_FLAG,
 
-    /// MO_GOT - Represents the offset into the global offset table at which
+    /// MO_GOT16 - Represents the offset into the global offset table at which
     /// the address the relocation entry symbol resides during execution.
+    MO_GOT16,
     MO_GOT,
 
     /// MO_GOT_CALL - Represents the offset into the global offset table at
@@ -55,6 +56,13 @@ namespace MipsII {
     // Dynamic TLS).
     MO_TLSGD,
 
+    /// MO_TLSLDM - Represents the offset into the global offset table at which
+    // the module ID and TSL block offset reside during execution (Local
+    // Dynamic TLS).
+    MO_TLSLDM,
+    MO_DTPREL_HI,
+    MO_DTPREL_LO,
+
     /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
     // Exec TLS).
     MO_GOTTPREL,
@@ -180,6 +188,7 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
   case Mips::D14:
     return 28;
   case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+  case Mips::HWR29:
     return 29;
   case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
   case Mips::D15: 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 20890ed..a56c002 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -14,74 +14,82 @@
 
 namespace llvm {
 namespace Mips {
-    enum Fixups {
-        // fixup_Mips_xxx - R_MIPS_NONE
-        fixup_Mips_NONE = FirstTargetFixupKind,
+  // Although most of the current fixup types reflect a unique relocation
+  // one can have multiple fixup types for a given relocation and thus need
+  // to be uniquely named.
+  //
+  // This table *must* be in the save order of
+  // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+  // in MipsAsmBackend.cpp.
+  //
+  enum Fixups {
+    // Branch fixups resulting in R_MIPS_16.
+    fixup_Mips_16 = FirstTargetFixupKind,
 
-        // fixup_Mips_xxx - R_MIPS_16.
-        fixup_Mips_16,
+    // Pure 32 bit data fixup resulting in - R_MIPS_32.
+    fixup_Mips_32,
 
-        // fixup_Mips_xxx - R_MIPS_32.
-        fixup_Mips_32,
+    // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+    fixup_Mips_REL32,
 
-        // fixup_Mips_xxx - R_MIPS_REL32.
-        fixup_Mips_REL32,
+    // Jump 26 bit fixup resulting in - R_MIPS_26.
+    fixup_Mips_26,
 
-        // fixup_Mips_xxx - R_MIPS_26.
-        fixup_Mips_26,
+    // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+    fixup_Mips_HI16,
 
-        // fixup_Mips_xxx - R_MIPS_HI16.
-        fixup_Mips_HI16,
+    // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+    fixup_Mips_LO16,
 
-        // fixup_Mips_xxx - R_MIPS_LO16.
-        fixup_Mips_LO16,
+    // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+    fixup_Mips_GPREL16,
 
-        // fixup_Mips_xxx - R_MIPS_GPREL16.
-        fixup_Mips_GPREL16,
+    // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+    fixup_Mips_LITERAL,
 
-        // fixup_Mips_xxx - R_MIPS_LITERAL.
-        fixup_Mips_LITERAL,
+    // Global symbol fixup resulting in - R_MIPS_GOT16.
+    fixup_Mips_GOT_Global,
 
-        // fixup_Mips_xxx - R_MIPS_GOT16.
-        fixup_Mips_GOT16,
+    // Local symbol fixup resulting in - R_MIPS_GOT16.
+    fixup_Mips_GOT_Local,
 
-        // fixup_Mips_xxx - R_MIPS_PC16.
-        fixup_Mips_PC16,
+    // PC relative branch fixup resulting in - R_MIPS_PC16.
+    fixup_Mips_PC16,
 
-        // fixup_Mips_xxx - R_MIPS_CALL16.
-        fixup_Mips_CALL16,
+    // resulting in - R_MIPS_CALL16.
+    fixup_Mips_CALL16,
 
-        // fixup_Mips_xxx - R_MIPS_GPREL32.
-        fixup_Mips_GPREL32,
+    // resulting in - R_MIPS_GPREL32.
+    fixup_Mips_GPREL32,
 
-        // fixup_Mips_xxx - R_MIPS_SHIFT5.
-        fixup_Mips_SHIFT5,
+    // resulting in - R_MIPS_SHIFT5.
+    fixup_Mips_SHIFT5,
 
-        // fixup_Mips_xxx - R_MIPS_SHIFT6.
-        fixup_Mips_SHIFT6,
+    // resulting in - R_MIPS_SHIFT6.
+    fixup_Mips_SHIFT6,
 
-        // fixup_Mips_xxx - R_MIPS_64.
-        fixup_Mips_64,
+    // Pure 64 bit data fixup resulting in - R_MIPS_64.
+    fixup_Mips_64,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_GD.
-        fixup_Mips_TLSGD,
+    // resulting in - R_MIPS_TLS_GD.
+    fixup_Mips_TLSGD,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
-        fixup_Mips_GOTTPREL,
+    // resulting in - R_MIPS_TLS_GOTTPREL.
+    fixup_Mips_GOTTPREL,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
-        fixup_Mips_TPREL_HI,
+    // resulting in - R_MIPS_TLS_TPREL_HI16.
+    fixup_Mips_TPREL_HI,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
-        fixup_Mips_TPREL_LO,
+    // resulting in - R_MIPS_TLS_TPREL_LO16.
+    fixup_Mips_TPREL_LO,
 
-        // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
-        fixup_Mips_Branch_PCRel,
+    // PC relative branch fixup resulting in - R_MIPS_PC16
+    fixup_Mips_Branch_PCRel,
 
-        // Marker
-        LastTargetFixupKind,
-        NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-    };
+    // Marker
+    LastTargetFixupKind,
+    NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+  };
 } // namespace Mips
 } // namespace llvm
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 0c3cbb3..463dcfe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -194,8 +194,11 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
       case MCSymbolRefExpr::VK_Mips_GOT_CALL:
         FixupKind = Mips::fixup_Mips_CALL16;
         break;
+      case MCSymbolRefExpr::VK_Mips_GOT16:
+        FixupKind = Mips::fixup_Mips_GOT_Global;
+        break;
       case MCSymbolRefExpr::VK_Mips_GOT:
-        FixupKind = Mips::fixup_Mips_GOT16;
+        FixupKind = Mips::fixup_Mips_GOT_Local;
         break;
       case MCSymbolRefExpr::VK_Mips_ABS_HI:
         FixupKind = Mips::fixup_Mips_HI16;
@@ -245,8 +248,8 @@ unsigned
 MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
                                       SmallVectorImpl<MCFixup> &Fixups) const {
   assert(MI.getOperand(OpNo).isImm());
-  unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
-  return szEncoding - 1;
+  unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+  return SizeEncoding - 1;
 }
 
 // FIXME: should be called getMSBEncoding
@@ -256,10 +259,10 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
                                       SmallVectorImpl<MCFixup> &Fixups) const {
   assert(MI.getOperand(OpNo-1).isImm());
   assert(MI.getOperand(OpNo).isImm());
-  unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
-  unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+  unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+  unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
 
-  return pos + sz - 1;
+  return Position + Size - 1;
 }
 
 #include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 39c2c16..e9e0f60 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -79,9 +79,9 @@ def FeatureMips64r2    : SubtargetFeature<"mips64r2", "MipsArchVersion",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MipsGenericItineraries, Features>;
 
-def : Proc<"mips32r1", [FeatureMips32]>;
-def : Proc<"4ke", [FeatureMips32r2]>;
-def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
 def : Proc<"mips64r2", [FeatureMips64r2]>;
 
 def MipsAsmWriter : AsmWriter {
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index b0fb4fa..2996986 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -25,7 +25,7 @@ def uimm16_64      : Operand<i64> {
 
 // Transformation Function - get Imm - 32.
 def Subtract32 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() - 32);
+  return getImm(N, (unsigned)N->getZExtValue() - 32);
 }]>;
 
 // shamt field must fit in 5 bits.
@@ -36,6 +36,19 @@ def imm32_63 : ImmLeaf<i32,
                        [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
                        Subtract32>;
 
+// Is a 32-bit int.
+def immSExt32 : ImmLeaf<i64, [{return isInt<32>(Imm);}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HIGHER : SDNodeXForm<imm, [{
+  return getImm(N, (N->getZExtValue() >> 32) & 0xFFFF);
+}]>;
+
+// Transformation Function - get the highest 16 bits.
+def HIGHEST : SDNodeXForm<imm, [{
+  return getImm(N, (N->getZExtValue() >> 48) & 0xFFFF);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
@@ -206,6 +219,17 @@ let Uses = [SP_64] in
 def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
                  Requires<[IsN64]>;
 
+def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+
+def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DINS : InsBase<7, "dins", CPU64Regs>;
+
+def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                   "dsll32\t$rd, $rt, 0", [], IIAlu>;
+
+def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                  "sll\t$rd, $rt, 0", [], IIAlu>;
+
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
@@ -216,9 +240,15 @@ def : Pat<(i64 immSExt16:$in),
 def : Pat<(i64 immZExt16:$in),
           (ORi64 ZERO_64, imm:$in)>;
 
+// 32-bit immediates
+def : Pat<(i64 immSExt32:$imm),
+          (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
 // Arbitrary immediates
 def : Pat<(i64 imm:$imm),
-          (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+          (ORi64 (DSLL (ORi64 (DSLL (ORi64 (LUi64 (HIGHEST imm:$imm)),
+           (HIGHER imm:$imm)), 16), (HI16 imm:$imm)), 16),
+           (LO16 imm:$imm))>;
 
 // extended loads
 let Predicates = [NotN64] in {
@@ -236,11 +266,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
 def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
 def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
 def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
 
 def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
 def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
 def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
 def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
 
 def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
           (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
@@ -250,6 +282,15 @@ def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
           (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
 def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
           (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+          (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, GP_64>;
+def : WrapperPat<tconstpool, DADDiu, GP_64>;
+def : WrapperPat<texternalsym, DADDiu, GP_64>;
+def : WrapperPat<tblockaddress, DADDiu, GP_64>;
+def : WrapperPat<tjumptable, DADDiu, GP_64>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, GP_64>;
 
 defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
                   ZERO_64>;
@@ -268,3 +309,6 @@ def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
 def : Pat<(i32 (trunc CPU64Regs:$src)),
           (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
  
+// 32-to-64-bit extension
+def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : Pat<(i64 (zext CPURegs:$src)), (DSRL32 (DSLL64_32 CPURegs:$src), 0)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index d27e3ab..a5505d3 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -96,19 +96,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   if (!OutStreamer.hasRawTextSupport()) {
     // Lower CPLOAD and CPRESTORE
-    if (Opc == Mips::CPLOAD) {
+    if (Opc == Mips::CPLOAD)
       MCInstLowering.LowerCPLOAD(MI, MCInsts);
-      for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
-          != MCInsts.end(); ++I)
+    else if (Opc == Mips::CPRESTORE)
+      MCInstLowering.LowerCPRESTORE(MI, MCInsts);
+    
+    if (!MCInsts.empty()) {
+      for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+           I != MCInsts.end(); ++I)
         OutStreamer.EmitInstruction(*I);
       return;
     }
-
-    if (Opc == Mips::CPRESTORE) {
-      MCInstLowering.LowerCPRESTORE(MI, TmpInst0);
-      OutStreamer.EmitInstruction(TmpInst0);
-      return;
-    }
   }
 
   OutStreamer.EmitInstruction(TmpInst0);
@@ -317,9 +315,9 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
   // Otherwise, check the last instruction.
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+  while (I != Pred->begin() && !(--I)->isTerminator()) ;
 
-  return !I->getDesc().isBarrier();
+  return !I->isBarrier();
 }
 
 // Print out an operand for an inline asm expression.
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index a8f29ae..6b26e24 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -144,7 +144,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
         MBB != E; ++MBB){
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
           I != E; ++I)
         emitInstruction(*I);
     }
@@ -161,7 +161,7 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
   if (Form == MipsII::FrmJ)
     return Mips::reloc_mips_26;
   if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
-       && MI.getDesc().isBranch())
+       && MI.isBranch())
     return Mips::reloc_mips_branch;
   if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
     return Mips::reloc_mips_hi;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index be3b7a0..1d9e9b0 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   LastFiller = MBB.end();
 
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       ++FilledSlots;
       Changed = true;
 
@@ -146,7 +146,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
         || I->isInlineAsm()
         || I->isLabel()
         || FI == LastFiller
-        || I->getDesc().isPseudo()
+        || I->isPseudo()
         //
         // Should not allow:
         // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
@@ -174,16 +174,15 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
   if (candidate->isImplicitDef() || candidate->isKill())
     return true;
 
-  MCInstrDesc MCID = candidate->getDesc();
   // Loads or stores cannot be moved past a store to the delay slot
   // and stores cannot be moved past a load. 
-  if (MCID.mayLoad()) {
+  if (candidate->mayLoad()) {
     if (sawStore)
       return true;
     sawLoad = true;
   }
 
-  if (MCID.mayStore()) {
+  if (candidate->mayStore()) {
     if (sawStore)
       return true;
     sawStore = true;
@@ -191,7 +190,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
       return true;
   }
 
-  assert((!MCID.isCall() && !MCID.isReturn()) &&
+  assert((!candidate->isCall() && !candidate->isReturn()) &&
          "Cannot put calls or returns in delay slot.");
 
   for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
@@ -221,11 +220,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
                             SmallSet<unsigned, 32>& RegUses) {
   // If MI is a call or return, just examine the explicit non-variadic operands.
   MCInstrDesc MCID = MI->getDesc();
-  unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
-                                                  MI->getNumOperands();
+  unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
+                                                MI->getNumOperands();
   
   // Add RA to RegDefs to prevent users of RA from going into delay slot. 
-  if (MCID.isCall())
+  if (MI->isCall())
     RegDefs.insert(Mips::RA);
 
   for (unsigned i = 0; i != e; ++i) {
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 36aef99..2466545 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -85,8 +85,8 @@ using namespace llvm;
 // if frame pointer elimination is disabled.
 bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
-      || MFI->isFrameAddressTaken();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+      MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
 }
 
 bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9c831ed..b17239d 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -86,10 +86,9 @@ private:
   // Complex Pattern.
   bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
 
-  // getI32Imm - Return a target constant with the specified
-  // value, of type i32.
-  inline SDValue getI32Imm(unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  // getImm - Return a target constant with the specified value.
+  inline SDValue getImm(const SDNode *Node, unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
   }
 
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -122,21 +121,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   }
 
   // on PIC code Load GA
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    if (Addr.getOpcode() == MipsISD::WrapperPIC) {
-      Base   = CurDAG->getRegister(GPReg, ValTy);
-      Offset = Addr.getOperand(0);
-      return true;
-    }
-  } else {
+  if (Addr.getOpcode() == MipsISD::Wrapper) {
+    Base   = CurDAG->getRegister(GPReg, ValTy);
+    Offset = Addr.getOperand(0);
+    return true;
+  }
+
+  if (TM.getRelocationModel() != Reloc::PIC_) {
     if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
         Addr.getOpcode() == ISD::TargetGlobalAddress))
       return false;
-    else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
-      Base   = CurDAG->getRegister(GPReg, ValTy);
-      Offset = Addr;
-      return true;
-    }
   }
 
   // Addresses of the form FI+const or FI|const
@@ -310,13 +304,24 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
     }
 
     case MipsISD::ThreadPointer: {
-      unsigned SrcReg = Mips::HWR29;
-      unsigned DestReg = Mips::V1;
-      SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
-          Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
+      EVT PtrVT = TLI.getPointerTy();
+      unsigned RdhwrOpc, SrcReg, DestReg;
+
+      if (PtrVT == MVT::i32) {
+        RdhwrOpc = Mips::RDHWR;
+        SrcReg = Mips::HWR29;
+        DestReg = Mips::V1;
+      } else {
+        RdhwrOpc = Mips::RDHWR64;
+        SrcReg = Mips::HWR29_64;
+        DestReg = Mips::V1_64;
+      }
+      
+      SDNode *Rdhwr = CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+          Node->getValueType(0), CurDAG->getRegister(SrcReg, PtrVT));
       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
           SDValue(Rdhwr, 0));
-      SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
+      SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
       ReplaceUses(SDValue(Node, 0), ResNode);
       return ResNode.getNode();
     }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b5a15cf..c9b657c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -40,11 +40,11 @@ using namespace llvm;
 // mask (Pos), and return true.
 // For example, if I is 0x003ff800, (Pos, Size) = (11, 11).  
 static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
-  if (!isUInt<32>(I) || !isShiftedMask_32(I))
+  if (!isShiftedMask_64(I))
      return false;
 
-  Size = CountPopulation_32(I);
-  Pos = CountTrailingZeros_32(I);
+  Size = CountPopulation_64(I);
+  Pos = CountTrailingZeros_64(I);
   return true;
 }
 
@@ -54,9 +54,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::Hi:                return "MipsISD::Hi";
   case MipsISD::Lo:                return "MipsISD::Lo";
   case MipsISD::GPRel:             return "MipsISD::GPRel";
-  case MipsISD::TlsGd:             return "MipsISD::TlsGd";
-  case MipsISD::TprelHi:           return "MipsISD::TprelHi";
-  case MipsISD::TprelLo:           return "MipsISD::TprelLo";
   case MipsISD::ThreadPointer:     return "MipsISD::ThreadPointer";
   case MipsISD::Ret:               return "MipsISD::Ret";
   case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
@@ -72,7 +69,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::DivRemU:           return "MipsISD::DivRemU";
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
-  case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::Wrapper:           return "MipsISD::Wrapper";
   case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   case MipsISD::Sync:              return "MipsISD::Sync";
   case MipsISD::Ext:               return "MipsISD::Ext";
@@ -129,7 +126,9 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i64,   Custom);
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
@@ -157,6 +156,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
   setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,   MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,   MVT::i64,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i64,   Expand);
 
@@ -555,20 +558,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
     return SDValue();
 
   SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
-  
+  unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
   // Op's first operand must be a shift right.
-  if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+  if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
     return SDValue();
 
   // The second operand of the shift must be an immediate.
-  uint64_t Pos;
   ConstantSDNode *CN;
   if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
     return SDValue();
   
-  Pos = CN->getZExtValue();
-
+  uint64_t Pos = CN->getZExtValue();
   uint64_t SMPos, SMSize;
+
   // Op's second operand must be a shifted mask.
   if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
       !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
@@ -576,10 +579,11 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
 
   // Return if the shifted mask does not start at bit 0 or the sum of its size
   // and Pos exceeds the word's size.
-  if (SMPos != 0 || Pos + SMSize > 32)
+  EVT ValTy = N->getValueType(0);
+  if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
     return SDValue();
 
-  return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
+  return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
                      ShiftRight.getOperand(0),
                      DAG.getConstant(Pos, MVT::i32),
                      DAG.getConstant(SMSize, MVT::i32));
@@ -630,10 +634,11 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
 
   // Return if the shift amount and the first bit position of mask are not the
   // same.  
-  if (Shamt != SMPos0)
+  EVT ValTy = N->getValueType(0);
+  if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
     return SDValue();
   
-  return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
+  return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy,
                      Shl.getOperand(0),
                      DAG.getConstant(SMPos0, MVT::i32),
                      DAG.getConstant(SMSize0, MVT::i32),
@@ -1485,9 +1490,9 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
                      (GV->hasLocalLinkage() && !isa<Function>(GV)));
   unsigned GotFlag = IsN64 ?
                      (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
-                     MipsII::MO_GOT;
+                     (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
-  GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
+  GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GA);
   SDValue ResNode = DAG.getLoad(ValTy, dl,
                                 DAG.getEntryNode(), GA, MachinePointerInfo(),
                                 false, false, false, 0);
@@ -1523,7 +1528,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
   unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
   unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
   SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
-  BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset);
+  BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, BAGOTOffset);
   SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
   SDValue Load = DAG.getLoad(ValTy, dl,
                              DAG.getEntryNode(), BAGOTOffset,
@@ -1535,9 +1540,9 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
 SDValue MipsTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  // If the relocation model is PIC, use the General Dynamic TLS Model,
-  // otherwise use the Initial Exec or Local Exec TLS Model.
-  // TODO: implement Local Dynamic TLS model
+  // If the relocation model is PIC, use the General Dynamic TLS Model or
+  // Local Dynamic TLS model, otherwise use the Initial Exec or
+  // Local Exec TLS Model.
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   DebugLoc dl = GA->getDebugLoc();
@@ -1546,45 +1551,59 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     // General Dynamic TLS Model
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
-                                             0, MipsII::MO_TLSGD);
-    SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
-    SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
-    SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+    bool LocalDynamic = GV->hasInternalLinkage();
+    unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
+    SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+    unsigned PtrSize = PtrVT.getSizeInBits();
+    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
 
     ArgListTy Args;
     ArgListEntry Entry;
     Entry.Node = Argument;
-    Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+    Entry.Ty = PtrTy;
     Args.push_back(Entry);
+    
     std::pair<SDValue, SDValue> CallResult =
-        LowerCallTo(DAG.getEntryNode(),
-                    (Type *) Type::getInt32Ty(*DAG.getContext()),
-                    false, false, false, false, 0, CallingConv::C, false, true,
-                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
-                    dl);
-
-    return CallResult.first;
+      LowerCallTo(DAG.getEntryNode(), PtrTy,
+                  false, false, false, false, 0, CallingConv::C, false, true,
+                  TlsGetAddr, Args, DAG, dl);
+
+    SDValue Ret = CallResult.first;
+
+    if (!LocalDynamic)
+      return Ret;
+
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               MipsII::MO_DTPREL_HI);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               MipsII::MO_DTPREL_LO);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
   }
 
   SDValue Offset;
   if (GV->isDeclaration()) {
     // Initial Exec TLS Model
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                              MipsII::MO_GOTTPREL);
-    Offset = DAG.getLoad(MVT::i32, dl,
+    TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, TGA);
+    Offset = DAG.getLoad(PtrVT, dl,
                          DAG.getEntryNode(), TGA, MachinePointerInfo(),
                          false, false, false, 0);
   } else {
     // Local Exec TLS Model
-    SDVTList VTs = DAG.getVTList(MVT::i32);
-    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                MipsII::MO_TPREL_HI);
-    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                MipsII::MO_TPREL_LO);
-    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
-    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
-    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
   }
 
   SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
@@ -1594,34 +1613,29 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 SDValue MipsTargetLowering::
 LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
-  SDValue ResNode;
-  SDValue HiPart;
+  SDValue HiPart, JTI, JTILo;
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
   bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
-
   EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
-
-  if (!IsPIC) {
-    SDValue Ops[] = { JTI };
-    HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+  if (!IsPIC && !IsN64) {
+    JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
+    HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
+    JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
   } else {// Emit Load from Global Pointer
-    JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
-                         MachinePointerInfo(),
-                         false, false, false, 0);
+    unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+    unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+    JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
+    JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, JTI);
+    HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(), false, false, false, 0);
+    JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
   }
 
-  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                                         MipsII::MO_ABS_LO);
-  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
-  ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
-  return ResNode;
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
 }
 
 SDValue MipsTargetLowering::
@@ -1657,7 +1671,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
     unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
     SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
                                            N->getOffset(), GOTFlag);
-    CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP);
+    CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, CP);
     SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(),
                                CP, MachinePointerInfo::getConstantPool(),
                                false, false, false, 0);
@@ -1685,21 +1699,29 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
                       MachinePointerInfo(SV),
                       false, false, 0);
 }
-
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
+ 
+// Called if the size of integer registers is large enough to hold the whole
+// floating point number.
+static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) {
   // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
+  EVT ValTy = Op.getValueType();
+  EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits());
+  uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1);
   DebugLoc dl = Op.getDebugLoc();
-  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
-  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
-  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
-                             DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
-                             DAG.getConstant(0x80000000, MVT::i32));
-  SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
-  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0));
+  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1));
+  SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0,
+                             DAG.getConstant(Mask - 1, IntValTy));
+  SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1,
+                             DAG.getConstant(Mask, IntValTy));
+  SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1);
+  return DAG.getNode(ISD::BITCAST, dl, ValTy, Result);
 }
 
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
+// Called if the size of integer registers is not large enough to hold the whole
+// floating point number (e.g. f64 & 32-bit integer register).
+static SDValue
+LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) {
   // FIXME:
   //  Use ext/ins instructions if target architecture is Mips32r2.
   //  Eliminate redundant mfc1 and mtc1 instructions.
@@ -1734,10 +1756,10 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
 
   assert(Ty == MVT::f32 || Ty == MVT::f64);
 
-  if (Ty == MVT::f32)
-    return LowerFCOPYSIGN32(Op, DAG);
+  if (Ty == MVT::f32 || HasMips64)
+    return LowerFCOPYSIGNLargeIntReg(Op, DAG);
   else
-    return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+    return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle());
 }
 
 SDValue MipsTargetLowering::
@@ -2328,7 +2350,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
   // node so that legalize doesn't hack it.
   unsigned char OpFlag;
   bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
-  bool LoadSymAddr = false;
+  bool GlobalOrExternal = false;
   SDValue CalleeLo;
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -2345,7 +2367,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
                                           getPointerTy(), 0, OpFlag);
     }
 
-    LoadSymAddr = true;
+    GlobalOrExternal = true;
   }
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     if (IsN64 || (!IsO32 && IsPIC))
@@ -2356,16 +2378,16 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
       OpFlag = MipsII::MO_GOT_CALL;
     Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
                                          getPointerTy(), OpFlag);
-    LoadSymAddr = true;
+    GlobalOrExternal = true;
   }
 
   SDValue InFlag;
 
   // Create nodes that load address of callee and copy it to T9
   if (IsPICCall) {
-    if (LoadSymAddr) {
+    if (GlobalOrExternal) {
       // Load callee address
-      Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee);
+      Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(), Callee);
       SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                                       Callee, MachinePointerInfo::getGOT(),
                                       false, false, false, 0);
@@ -2377,7 +2399,11 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
       } else
         Callee = LoadValue;
     }
+  }
 
+  // T9 should contain the address of the callee function if 
+  // -reloction-model=pic or it is an indirect call.
+  if (IsPICCall || !GlobalOrExternal) {
     // copy to T9
     unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
     Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f2b64e3..81d093f 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -40,13 +40,6 @@ namespace llvm {
       // Handle gp_rel (small data/bss sections) relocation.
       GPRel,
 
-      // General Dynamic TLS
-      TlsGd,
-
-      // Local Exec TLS
-      TprelHi,
-      TprelLo,
-
       // Thread Pointer
       ThreadPointer,
 
@@ -79,7 +72,7 @@ namespace llvm {
       BuildPairF64,
       ExtractElementF64,
 
-      WrapperPIC,
+      Wrapper,
 
       DynAlloc,
 
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e1725fa..21a1862 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
   let Inst{15-0}  = imm16;
 }
 
-class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
                   list<dag> pattern, InstrItinClass itin>:
   MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
 {
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 5358dc0..ea101f7 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
 MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
   : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
     TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
-    RI(*TM.getSubtargetImpl(), *this) {}
-
+    RI(*TM.getSubtargetImpl(), *this),
+    UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
 
 const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { 
   return RI;
@@ -236,7 +236,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
           Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
           Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
           Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
-          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::J) ?
+          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::B      ||
+          Opc == Mips::J) ?
          Opc : 0;
 }
 
@@ -320,7 +321,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // If there is only one terminator instruction, process it.
   if (!SecondLastOpc) {
     // Unconditional branch
-    if (LastOpc == Mips::J) {
+    if (LastOpc == UncondBrOpc) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
@@ -337,7 +338,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // If second to last instruction is an unconditional branch,
   // analyze it and remove the last instruction.
-  if (SecondLastOpc == Mips::J) {
+  if (SecondLastOpc == UncondBrOpc) {
     // Return if the last instruction cannot be removed.
     if (!AllowModify)
       return true;
@@ -349,7 +350,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // Conditional branch followed by an unconditional branch.
   // The last one must be unconditional.
-  if (LastOpc != Mips::J)
+  if (LastOpc != UncondBrOpc)
     return true;
 
   AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
@@ -391,14 +392,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   // Two-way Conditional branch.
   if (FBB) {
     BuildCondBr(MBB, TBB, DL, Cond);
-    BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+    BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
     return 2;
   }
 
   // One way branch.
   // Unconditional branch.
   if (Cond.empty())
-    BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
   else // Conditional branch.
     BuildCondBr(MBB, TBB, DL, Cond);
   return 1;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 8fa3052..70cc2cf 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -34,6 +34,7 @@ class MipsInstrInfo : public MipsGenInstrInfo {
   MipsTargetMachine &TM;
   bool IsN64;
   const MipsRegisterInfo RI;
+  unsigned UncondBrOpc;
 public:
   explicit MipsInstrInfo(MipsTargetMachine &TM);
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 0ae94ab..9fcc5fd 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -107,7 +107,7 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 //  movn  %got(d)($gp), %got(c)($gp), $4
 // This instruction is illegal since movn can take only register operands.
 
-def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
+def MipsWrapper    : SDNode<"MipsISD::Wrapper",  SDTIntUnaryOp>;
 
 // Pointer to dynamically allocated stack area.
 def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
@@ -132,6 +132,8 @@ def NotMips64    : Predicate<"!Subtarget.hasMips64()">;
 def HasMips64r2  : Predicate<"Subtarget.hasMips64r2()">;
 def IsN64       : Predicate<"Subtarget.isABI_N64()">;
 def NotN64      : Predicate<"!Subtarget.isABI_N64()">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC    : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
 
 //===----------------------------------------------------------------------===//
 // Mips Operand, Complex Patterns and Transformations Definitions.
@@ -194,12 +196,12 @@ def size_ins : Operand<i32> {
 
 // Transformation Function - get the lower 16 bits.
 def LO16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+  return getImm(N, N->getZExtValue() & 0xFFFF);
 }]>;
 
 // Transformation Function - get the higher 16 bits.
 def HI16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+  return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
 }]>;
 
 // Node immediate fits as 16-bit sign extended on target immediate.
@@ -380,21 +382,13 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
   let isPseudo = Pseudo;
 }
 
-// Memory Load/Store
+// Unaligned Memory Load/Store
 let canFoldAsLoad = 1 in
-class LoadX<bits<6> op, RegisterClass RC,
-            Operand MemOpnd>:
-  FMem<op, (outs RC:$rt), (ins MemOpnd:$addr),
-     "",
-     [], IILoad> {
-}
+class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+  FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
 
-class StoreX<bits<6> op, RegisterClass RC,
-             Operand MemOpnd>:
-  FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr),
-     "",
-     [], IIStore> {
-}
+class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+  FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
 
 // 32-bit load.
 multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
@@ -415,10 +409,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
 } 
 
 // 32-bit load.
-multiclass LoadX32<bits<6> op> {
-  def #NAME# : LoadX<op, CPURegs, mem>,
+multiclass LoadUnAlign32<bits<6> op> {
+  def #NAME# : LoadUnAlign<op, CPURegs, mem>,
                Requires<[NotN64]>;
-  def _P8    : LoadX<op, CPURegs, mem64>,
+  def _P8    : LoadUnAlign<op, CPURegs, mem64>,
                Requires<[IsN64]>;
 }
 // 32-bit store.
@@ -440,18 +434,18 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
 }
 
 // 32-bit store.
-multiclass StoreX32<bits<6> op> {
-  def #NAME# : StoreX<op, CPURegs, mem>,
+multiclass StoreUnAlign32<bits<6> op> {
+  def #NAME# : StoreUnAlign<op, CPURegs, mem>,
                Requires<[NotN64]>;
-  def _P8    : StoreX<op, CPURegs, mem64>,
+  def _P8    : StoreUnAlign<op, CPURegs, mem64>,
                Requires<[IsN64]>;
 }
 
 // Conditional Branch
 class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
-  CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
-              !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
-              [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+  BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
+             !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
+             [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
@@ -459,9 +453,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
 
 class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
                   RegisterClass RC>:
-  CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
-              !strconcat(instr_asm, "\t$rs, $imm16"),
-              [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
+  BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
+             !strconcat(instr_asm, "\t$rs, $imm16"),
+             [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
   let rt = _rt;
   let isBranch = 1;
   let isTerminator = 1;
@@ -485,11 +479,29 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
      [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
      IIAlu>;
 
-// Unconditional branch
-let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+// Jump
 class JumpFJ<bits<6> op, string instr_asm>:
   FJ<op, (outs), (ins jmptarget:$target),
-     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
+  let isBranch=1;
+  let isTerminator=1;
+  let isBarrier=1;
+  let hasDelaySlot = 1;
+  let Predicates = [RelocStatic];  
+}
+
+// Unconditional branch
+class UncondBranch<bits<6> op, string instr_asm>:
+  BranchBase<op, (outs), (ins brtarget:$imm16),
+             !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
+  let rs = 0;
+  let rt = 0;
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let hasDelaySlot = 1;
+  let Predicates = [RelocPIC];  
+}
 
 let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
     isIndirectBranch = 1 in
@@ -616,21 +628,37 @@ class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
 }
 
 // Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
-    "rdhwr\t$rt, $rd", [], IIAlu> {
+class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
+  : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
+       "rdhwr\t$rt, $rd", [], IIAlu> {
   let rs = 0;
   let shamt = 0;
 }
 
 // Ext and Ins
-class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
-             list<dag> pattern, InstrItinClass itin>:
-  FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
-     pattern, itin>, Requires<[HasMips32r2]> {
+class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+  FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz), 
+     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+     [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
   bits<5> pos;
   bits<5> sz;
   let rd = sz;
   let shamt = pos;
+  let Predicates = [HasMips32r2];
+}
+
+class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+  FR<0x1f, _funct, (outs RC:$rt),
+     (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
+     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+     [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
+     NoItinerary> {
+  bits<5> pos;
+  bits<5> sz;
+  let rd = sz;
+  let shamt = pos;
+  let Predicates = [HasMips32r2];
+  let Constraints = "$src = $rt";
 }
 
 // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
@@ -795,10 +823,10 @@ defm USH     : StoreM32<0x29, "ush", truncstorei16_u, 1>;
 defm USW     : StoreM32<0x2b, "usw", store_u, 1>;
 
 /// Primitives for unaligned
-defm LWL     : LoadX32<0x22>;
-defm LWR     : LoadX32<0x26>;
-defm SWL     : StoreX32<0x2A>;
-defm SWR     : StoreX32<0x2E>;
+defm LWL     : LoadUnAlign32<0x22>;
+defm LWR     : LoadUnAlign32<0x26>;
+defm SWL     : StoreUnAlign32<0x2A>;
+defm SWR     : StoreUnAlign32<0x2E>;
 
 let hasSideEffects = 1 in
 def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
@@ -822,6 +850,7 @@ def J       : JumpFJ<0x02, "j">;
 def JR      : JumpFR<0x00, 0x08, "jr", CPURegs>;
 def JAL     : JumpLink<0x03, "jal">;
 def JALR    : JumpLinkReg<0x00, 0x09, "jalr">;
+def B       : UncondBranch<0x04, "b">;
 def BEQ     : CBranch<0x04, "beq", seteq, CPURegs>;
 def BNE     : CBranch<0x05, "bne", setne, CPURegs>;
 def BGEZ    : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
@@ -888,21 +917,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
 def MUL   : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
             Requires<[HasMips32]>;
 
-def RDHWR : ReadHardware;
-
-def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
-                 (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz),
-                 [(set CPURegs:$rt,
-                   (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
-                 NoItinerary>;
+def RDHWR : ReadHardware<CPURegs, HWRegs>;
 
-let Constraints = "$src = $rt" in
-def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
-                 (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src),
-                 [(set CPURegs:$rt,
-                   (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
-                    CPURegs:$src))],
-                 NoItinerary>;
+def EXT : ExtBase<0, "ext", CPURegs>;
+def INS : InsBase<4, "ins", CPURegs>;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -939,11 +957,13 @@ def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
 def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
 def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
 def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
 
 def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
 def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
 def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
 def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
 
 def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
           (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
@@ -953,6 +973,8 @@ def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
           (ADDiu CPURegs:$hi, tjumptable:$lo)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
           (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+          (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
 
 // gp_rel relocs
 def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
@@ -960,26 +982,17 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
 def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
           (ADDiu CPURegs:$gp, tconstpool:$in)>;
 
-// tlsgd
-def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
-          (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
-
-// tprel hi/lo
-def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
-          (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
-
 // wrapper_pic
-class WrapperPICPat<SDNode node>:
-      Pat<(MipsWrapperPIC node:$in),
-          (ADDiu GP, node:$in)>;
-
-def : WrapperPICPat<tglobaladdr>;
-def : WrapperPICPat<tconstpool>;
-def : WrapperPICPat<texternalsym>;
-def : WrapperPICPat<tblockaddress>;
-def : WrapperPICPat<tjumptable>;
+class WrapperPat<SDNode node, Instruction ADDiuOp, Register GPReg>:
+      Pat<(MipsWrapper node:$in),
+          (ADDiuOp GPReg, node:$in)>;
+
+def : WrapperPat<tglobaladdr, ADDiu, GP>;
+def : WrapperPat<tconstpool, ADDiu, GP>;
+def : WrapperPat<texternalsym, ADDiu, GP>;
+def : WrapperPat<tblockaddress, ADDiu, GP>;
+def : WrapperPat<tjumptable, ADDiu, GP>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, GP>;
 
 // Mips does not have "not", so we expand our way
 def : Pat<(not CPURegs:$in),
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 6fc2af1..23486d3 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -41,10 +41,14 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case MipsII::MO_NO_FLAG:  Kind = MCSymbolRefExpr::VK_None; break;
   case MipsII::MO_GPREL:    Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
   case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+  case MipsII::MO_GOT16:    Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
   case MipsII::MO_GOT:      Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
   case MipsII::MO_ABS_HI:   Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
   case MipsII::MO_ABS_LO:   Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
   case MipsII::MO_TLSGD:    Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+  case MipsII::MO_TLSLDM:   Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+  case MipsII::MO_DTPREL_HI:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+  case MipsII::MO_DTPREL_LO:Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
   case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
   case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
   case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
@@ -136,14 +140,35 @@ void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI,
 }
 
 // Lower ".cprestore offset" to "sw $gp, offset($sp)".
-void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) {
-  OutMI.clear();
-  OutMI.setOpcode(Mips::SW);
-  OutMI.addOperand(MCOperand::CreateReg(Mips::GP));
-  OutMI.addOperand(MCOperand::CreateReg(Mips::SP));
+void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI,
+                                     SmallVector<MCInst, 4>& MCInsts) {
   const MachineOperand &MO = MI->getOperand(0);
   assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
-  OutMI.addOperand(MCOperand::CreateImm(MO.getImm()));
+  unsigned Offset = MO.getImm(), Reg = Mips::SP;
+  MCInst Sw;
+
+  if (Offset >= 0x8000) {
+    unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); 
+    Offset &= 0xffff;
+    Reg = Mips::AT;
+
+    // lui   at,hi
+    // addu  at,at,sp
+    MCInsts.resize(2);
+    MCInsts[0].setOpcode(Mips::LUi);
+    MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT));
+    MCInsts[0].addOperand(MCOperand::CreateImm(Hi));
+    MCInsts[1].setOpcode(Mips::ADDu);
+    MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+    MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT));
+    MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP));
+  }
+  
+  Sw.setOpcode(Mips::SW);
+  Sw.addOperand(MCOperand::CreateReg(Mips::GP));
+  Sw.addOperand(MCOperand::CreateReg(Reg));
+  Sw.addOperand(MCOperand::CreateImm(Offset));
+  MCInsts.push_back(Sw);
 }
 
 MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 98e37e4..1490c14 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -36,7 +36,7 @@ public:
                   MipsAsmPrinter &asmprinter);  
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
   void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
-  void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI);
+  void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); 
   void LowerUnalignedLoadStore(const MachineInstr *MI,
 		                           SmallVector<MCInst, 4>& MCInsts);
 private:
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 06c4a66..e5a0f08 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -125,6 +125,7 @@ getRegisterNumbering(unsigned RegEnum)
   case Mips::D14:
     return 28;
   case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+  case Mips::HWR29:
     return 29;
   case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
   case Mips::D15: 
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 925ad9e..76ee2e6 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -239,6 +239,7 @@ let Namespace = "Mips" in {
 
   // Hardware register $29
   def HWR29 : Register<"29">;
+  def HWR29_64 : Register<"29">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -301,3 +302,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
 
 // Hardware registers
 def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
+def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 016d449..dc299f2 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,7 +31,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
-    CPUName = "mips32r1";
+    CPUName = "mips32";
 
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 5d6b24f..02887fa 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,51 +34,51 @@ extern "C" void LLVMInitializeMipsTarget() {
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
 MipsTargetMachine(const Target &T, StringRef TT,
-                  StringRef CPU, StringRef FS,
+                  StringRef CPU, StringRef FS, const TargetOptions &Options,
                   Reloc::Model RM, CodeModel::Model CM,
                   CodeGenOpt::Level OL,
-                  bool isLittle):
-  LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
-  Subtarget(TT, CPU, FS, isLittle),
-  DataLayout(isLittle ?
-             (Subtarget.isABI_N64() ?
-              "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-              "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
-             (Subtarget.isABI_N64() ?
-              "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-              "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
-  InstrInfo(*this),
-  FrameLowering(Subtarget),
-  TLInfo(*this), TSInfo(*this), JITInfo() {
+                  bool isLittle)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS, isLittle),
+    DataLayout(isLittle ?
+               (Subtarget.isABI_N64() ?
+                "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+                "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+               (Subtarget.isABI_N64() ?
+                "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+                "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this), TSInfo(*this), JITInfo() {
 }
 
 MipsebTargetMachine::
 MipsebTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
                     Reloc::Model RM, CodeModel::Model CM,
-                    CodeGenOpt::Level OL) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+                    CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 MipselTargetMachine::
 MipselTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
                     Reloc::Model RM, CodeModel::Model CM,
-                    CodeGenOpt::Level OL) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+                    CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 Mips64ebTargetMachine::
 Mips64ebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
-                      CodeGenOpt::Level OL) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {}
+                      CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 Mips64elTargetMachine::
 Mips64elTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
-                      CodeGenOpt::Level OL) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {}
+                      CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
@@ -120,4 +120,3 @@ bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
   PM.add(createMipsJITCodeEmitterPass(*this, JCE));
   return false;
 }
-
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index e40d9e2..6842373 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -38,7 +38,7 @@ namespace llvm {
 
   public:
     MipsTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL,
                       bool isLittle);
@@ -82,7 +82,7 @@ namespace llvm {
 class MipsebTargetMachine : public MipsTargetMachine {
 public:
   MipsebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
 };
@@ -92,7 +92,7 @@ public:
 class MipselTargetMachine : public MipsTargetMachine {
 public:
   MipselTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
 };
@@ -103,6 +103,7 @@ class Mips64ebTargetMachine : public MipsTargetMachine {
 public:
   Mips64ebTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
+                        const TargetOptions &Options,
                         Reloc::Model RM, CodeModel::Model CM,
                         CodeGenOpt::Level OL);
 };
@@ -113,6 +114,7 @@ class Mips64elTargetMachine : public MipsTargetMachine {
 public:
   Mips64elTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
+                        const TargetOptions &Options,
                         Reloc::Model RM, CodeModel::Model CM,
                         CodeGenOpt::Level OL);
 };
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 5692604..4172d00 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo
   MipsTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
index 90ae260..2d42568 100644
--- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = MipsInfo
 parent = Mips
 required_libraries = MC Support Target
 add_to_library_groups = Mips
-
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 6709c1b..a9f4330 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -25,20 +25,6 @@ add_llvm_target(PTXCodeGen
   PTXTargetMachine.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXCodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMPTXDesc
-  LLVMPTXInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(InstPrinter)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
index 029d060..b252893 100644
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter
 
 add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
 
-add_llvm_library_dependencies(LLVMPTXAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
index be89c10..af5d200 100644
--- a/lib/Target/PTX/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXAsmPrinter
 parent = PTX
 required_libraries = MC Support
 add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
index 2f6c92d..5fecb85 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -38,7 +38,50 @@ StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
 }
 
 void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+  // Decode the register number into type and offset
+  unsigned RegSpace  = RegNo & 0x7;
+  unsigned RegType   = (RegNo >> 3) & 0x7;
+  unsigned RegOffset = RegNo >> 6;
+
+  // Print the register
+  OS << "%";
+
+  switch (RegSpace) {
+  default:
+    llvm_unreachable("Unknown register space!");
+  case PTXRegisterSpace::Reg:
+    switch (RegType) {
+    default:
+      llvm_unreachable("Unknown register type!");
+    case PTXRegisterType::Pred:
+      OS << "p";
+      break;
+    case PTXRegisterType::B16:
+      OS << "rh";
+      break;
+    case PTXRegisterType::B32:
+      OS << "r";
+      break;
+    case PTXRegisterType::B64:
+      OS << "rd";
+      break;
+    case PTXRegisterType::F32:
+      OS << "f";
+      break;
+    case PTXRegisterType::F64:
+      OS << "fd";
+      break;
+    }
+    break;
+  case PTXRegisterSpace::Return:
+    OS << "ret";
+    break;
+  case PTXRegisterSpace::Argument:
+    OS << "arg";
+    break;
+  }
+
+  OS << RegOffset;
 }
 
 void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -139,6 +182,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     } else {
       O << "0000000000000000";
     }
+  } else if (Op.isReg()) {
+    printRegName(O, Op.getReg());
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     const MCExpr *Expr = Op.getExpr();
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt
index 22c70de..15a1eb5 100644
--- a/lib/Target/PTX/LLVMBuild.txt
+++ b/lib/Target/PTX/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = PTX
@@ -27,4 +30,3 @@ name = PTXCodeGen
 parent = PTX
 required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
 add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index 94dbcee..d1fd74c 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc
   PTXMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXDesc
-  LLVMMC
-  LLVMPTXAsmPrinter
-  LLVMPTXInfo
-  LLVMSupport
-  )
-
 add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
index fff21c1..19b80c5 100644
--- a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXDesc
 parent = PTX
 required_libraries = MC PTXAsmPrinter PTXInfo Support
 add_to_library_groups = PTX
-
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index c6094be..77a298d 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -17,6 +17,8 @@
 #ifndef PTXBASEINFO_H
 #define PTXBASEINFO_H
 
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "PTXMCTargetDesc.h"
 
 namespace llvm {
@@ -57,6 +59,75 @@ namespace llvm {
       RndPosInfInt        = 10  // .rpi
     };
   } // namespace PTXII
+
+  namespace PTXRegisterType {
+    // Register type encoded in MCOperands
+    enum {
+      Pred  = 0,
+      B16,
+      B32,
+      B64,
+      F32,
+      F64
+    };
+  } // namespace PTXRegisterType
+
+  namespace PTXRegisterSpace {
+    // Register space encoded in MCOperands
+    enum {
+      Reg = 0,
+      Local,
+      Param,
+      Argument,
+      Return
+    };
+  }
+
+  inline static void decodeRegisterName(raw_ostream &OS,
+                                        unsigned EncodedReg) {
+    OS << "%";
+
+    unsigned RegSpace  = EncodedReg & 0x7;
+    unsigned RegType   = (EncodedReg >> 3) & 0x7;
+    unsigned RegOffset = EncodedReg >> 6;
+
+    switch (RegSpace) {
+    default:
+      llvm_unreachable("Unknown register space!");
+    case PTXRegisterSpace::Reg:
+      switch (RegType) {
+      default:
+        llvm_unreachable("Unknown register type!");
+      case PTXRegisterType::Pred:
+        OS << "p";
+        break;
+      case PTXRegisterType::B16:
+        OS << "rh";
+        break;
+      case PTXRegisterType::B32:
+        OS << "r";
+        break;
+      case PTXRegisterType::B64:
+        OS << "rd";
+        break;
+      case PTXRegisterType::F32:
+        OS << "f";
+        break;
+      case PTXRegisterType::F64:
+        OS << "fd";
+        break;
+      }
+      break;
+    case PTXRegisterSpace::Return:
+      OS << "ret";
+      break;
+    case PTXRegisterSpace::Argument:
+      OS << "arg";
+      break;
+    }
+
+    OS << RegOffset;
+  }
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index bdf238b..77ed71d 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -51,23 +51,23 @@ using namespace llvm;
 static const char PARAM_PREFIX[] = "__param_";
 static const char RETURN_PREFIX[] = "__ret_";
 
-static const char *getRegisterTypeName(unsigned RegNo,
-                                       const MachineRegisterInfo& MRI) {
-  const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
-
-#define TEST_REGCLS(cls, clsstr) \
-  if (PTX::cls ## RegisterClass == TRC) return # clsstr;
-
-  TEST_REGCLS(RegPred, pred);
-  TEST_REGCLS(RegI16, b16);
-  TEST_REGCLS(RegI32, b32);
-  TEST_REGCLS(RegI64, b64);
-  TEST_REGCLS(RegF32, b32);
-  TEST_REGCLS(RegF64, b64);
-#undef TEST_REGCLS
-
-  llvm_unreachable("Not in any register class!");
-  return NULL;
+static const char *getRegisterTypeName(unsigned RegType) {
+  switch (RegType) {
+  default:
+    llvm_unreachable("Unknown register type");
+  case PTXRegisterType::Pred:
+    return ".pred";
+  case PTXRegisterType::B16:
+    return ".b16";
+  case PTXRegisterType::B32:
+    return ".b32";
+  case PTXRegisterType::B64:
+    return ".b64";
+  case PTXRegisterType::F32:
+    return ".f32";
+  case PTXRegisterType::F64:
+    return ".f64";
+  }
 }
 
 static const char *getStateSpaceName(unsigned addressSpace) {
@@ -188,32 +188,32 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
   unsigned numRegs;
 
   // pred
-  numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .pred %p<" << numRegs << ">;\n";
 
   // i16
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
 
   // i32
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .b32 %r<" << numRegs << ">;\n";
 
   // i64
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
 
   // f32
-  numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .f32 %f<" << numRegs << ">;\n";
 
   // f64
-  numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+  numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
   if(numRegs > 0)
     os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
 
@@ -368,7 +368,6 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
   const PTXParamManager &PM = MFI->getParamManager();
   const bool isKernel = MFI->isKernel();
   const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
-  const MachineRegisterInfo& MRI = MF->getRegInfo();
 
   SmallString<128> decl;
   raw_svector_ostream os(decl);
@@ -391,7 +390,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
         if (i != b)
           os << ", ";
 
-        os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+        os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
            << MFI->getRegisterName(*i);
       }
     }
@@ -450,7 +449,7 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
       if (i != b)
         os << ", ";
 
-      os << ".reg ." << getRegisterTypeName(*i, MRI) << ' '
+      os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
          << MFI->getRegisterName(*i);
     }
   }
@@ -521,20 +520,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
 MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
   MCOperand MCOp;
   const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
-  const MCExpr *Expr;
-  const char *RegSymbolName;
+  unsigned EncodedReg;
   switch (MO.getType()) {
   default:
     llvm_unreachable("Unknown operand type");
   case MachineOperand::MO_Register:
-    // We create register operands as symbols, since the PTXInstPrinter class
-    // has no way to map virtual registers back to a name without some ugly
-    // hacks.
-    // FIXME: Figure out a better way to handle virtual register naming.
-    RegSymbolName = MFI->getRegisterName(MO.getReg());
-    Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
-                                   OutContext);
-    MCOp = MCOperand::CreateExpr(Expr);
+    if (MO.getReg() > 0) {
+      // Encode the register
+      EncodedReg = MFI->getEncodedRegister(MO.getReg());
+    } else {
+      EncodedReg = 0;
+    }
+    MCOp = MCOperand::CreateReg(EncodedReg);
     break;
   case MachineOperand::MO_Immediate:
     MCOp = MCOperand::CreateImm(MO.getImm());
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
index 0b653e0..a21d172 100644
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -23,9 +23,11 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+using namespace llvm;
+
 // NOTE: PTXFPRoundingModePass should be executed just before emission.
 
-namespace llvm {
+namespace {
   /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
   /// all FP instructions. Essentially, this pass just looks for all FP
   /// instructions that have a rounding mode set to RndDefault, and sets an
@@ -58,7 +60,7 @@ namespace llvm {
       void initializeMap();
       void processInstruction(MachineInstr &MI);
   }; // class PTXFPRoundingModePass
-} // namespace llvm
+} // end anonymous namespace
 
 using namespace llvm;
 
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 17191fb..a012297 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -243,6 +243,30 @@ SDValue PTXTargetLowering::
     for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
       EVT                  RegVT = Ins[i].VT;
       TargetRegisterClass* TRC   = getRegClassFor(RegVT);
+      unsigned             RegType;
+
+      // Determine which register class we need
+      if (RegVT == MVT::i1) {
+        RegType = PTXRegisterType::Pred;
+      }
+      else if (RegVT == MVT::i16) {
+        RegType = PTXRegisterType::B16;
+      }
+      else if (RegVT == MVT::i32) {
+        RegType = PTXRegisterType::B32;
+      }
+      else if (RegVT == MVT::i64) {
+        RegType = PTXRegisterType::B64;
+      }
+      else if (RegVT == MVT::f32) {
+        RegType = PTXRegisterType::F32;
+      }
+      else if (RegVT == MVT::f64) {
+        RegType = PTXRegisterType::F64;
+      }
+      else {
+        llvm_unreachable("Unknown parameter type");
+      }
 
       // Use a unique index in the instruction to prevent instruction folding.
       // Yes, this is a hack.
@@ -253,7 +277,7 @@ SDValue PTXTargetLowering::
 
       InVals.push_back(ArgValue);
 
-      MFI->addArgReg(Reg);
+      MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
     }
   }
 
@@ -304,25 +328,32 @@ SDValue PTXTargetLowering::
     for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
       EVT                  RegVT = Outs[i].VT;
       TargetRegisterClass* TRC = 0;
+      unsigned             RegType;
 
       // Determine which register class we need
       if (RegVT == MVT::i1) {
         TRC = PTX::RegPredRegisterClass;
+        RegType = PTXRegisterType::Pred;
       }
       else if (RegVT == MVT::i16) {
         TRC = PTX::RegI16RegisterClass;
+        RegType = PTXRegisterType::B16;
       }
       else if (RegVT == MVT::i32) {
         TRC = PTX::RegI32RegisterClass;
+        RegType = PTXRegisterType::B32;
       }
       else if (RegVT == MVT::i64) {
         TRC = PTX::RegI64RegisterClass;
+        RegType = PTXRegisterType::B64;
       }
       else if (RegVT == MVT::f32) {
         TRC = PTX::RegF32RegisterClass;
+        RegType = PTXRegisterType::F32;
       }
       else if (RegVT == MVT::f64) {
         TRC = PTX::RegF64RegisterClass;
+        RegType = PTXRegisterType::F64;
       }
       else {
         llvm_unreachable("Unknown parameter type");
@@ -335,7 +366,7 @@ SDValue PTXTargetLowering::
 
       Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
 
-      MFI->addRetReg(Reg);
+      MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
     }
   }
 
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 1b947a5..871b3a7 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
 }
 
 bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+  return !isPredicated(MI) && MI->isTerminator();
 }
 
 bool PTXInstrInfo::
@@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
   if (MBB.empty())
     return true;
 
-  MachineBasicBlock::const_iterator iter = MBB.end();
+  MachineBasicBlock::iterator iter = MBB.end();
   const MachineInstr& instLast1 = *--iter;
-  const MCInstrDesc &desc1 = instLast1.getDesc();
   // for special case that MBB has only 1 instruction
   const bool IsSizeOne = MBB.size() == 1;
   // if IsSizeOne is true, *--iter and instLast2 are invalid
   // we put a dummy value in instLast2 and desc2 since they are used
   const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
-  const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
 
   DEBUG(dbgs() << "\n");
   DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
   }
 
   // this block ends with only an unconditional branch
-  if (desc1.isUnconditionalBranch() &&
+  if (instLast1.isUnconditionalBranch() &&
       // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
       (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
@@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   // this block ends with a conditional branch and
   // it falls through to a successor block
-  if (desc1.isConditionalBranch() &&
+  if (instLast1.isConditionalBranch() &&
       IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
     TBB = GetBranchTarget(instLast1);
@@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   // this block ends with a conditional branch
   // followed by an unconditional branch
-  if (desc2.isConditionalBranch() &&
-      desc1.isUnconditionalBranch()) {
+  if (instLast2.isConditionalBranch() &&
+      instLast1.isUnconditionalBranch()) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
     TBB = GetBranchTarget(instLast2);
     FBB = GetBranchTarget(instLast1);
@@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
 }
 
 bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
-  const MCInstrDesc &desc = inst.getDesc();
-  return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+  return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
 }
 
 bool PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index bcd5bcf..19a862f 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -825,17 +825,17 @@ let hasSideEffects = 1 in {
 ///===- Parameter Passing Pseudo-Instructions -----------------------------===//
 
 def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
-                            "mov.pred\t$a, %param$b", []>;
+                            "mov.pred\t$a, %arg$b", []>;
 def READPARAMI16  : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
-                            "mov.b16\t$a, %param$b", []>;
+                            "mov.b16\t$a, %arg$b", []>;
 def READPARAMI32  : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
-                            "mov.b32\t$a, %param$b", []>;
+                            "mov.b32\t$a, %arg$b", []>;
 def READPARAMI64  : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
-                            "mov.b64\t$a, %param$b", []>;
+                            "mov.b64\t$a, %arg$b", []>;
 def READPARAMF32  : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
-                            "mov.f32\t$a, %param$b", []>;
+                            "mov.f32\t$a, %arg$b", []>;
 def READPARAMF64  : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
-                            "mov.f64\t$a, %param$b", []>;
+                            "mov.f64\t$a, %arg$b", []>;
 
 def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
 def WRITEPARAMI16  : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b33a273..26ec623 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,9 +22,11 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+using namespace llvm;
+
 // NOTE: PTXMFInfoExtract must after register allocation!
 
-namespace llvm {
+namespace {
   /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
   /// function information for PTXAsmPrinter
   ///
@@ -42,7 +44,7 @@ namespace llvm {
         return "PTX Machine Function Info Extractor";
       }
   }; // class PTXMFInfoExtract
-} // namespace llvm
+} // end anonymous namespace
 
 using namespace llvm;
 
@@ -56,7 +58,20 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
   for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
-    MFI->addVirtualRegister(TRC, Reg);
+    unsigned RegType;
+    if (TRC == PTX::RegPredRegisterClass)
+      RegType = PTXRegisterType::Pred;
+    else if (TRC == PTX::RegI16RegisterClass)
+      RegType = PTXRegisterType::B16;
+    else if (TRC == PTX::RegI32RegisterClass)
+      RegType = PTXRegisterType::B32;
+    else if (TRC == PTX::RegI64RegisterClass)
+      RegType = PTXRegisterType::B64;
+    else if (TRC == PTX::RegF32RegisterClass)
+      RegType = PTXRegisterType::F32;
+    else if (TRC == PTX::RegF64RegisterClass)
+      RegType = PTXRegisterType::F64;
+    MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
   }
 
   return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 3b985f7..1a2878c 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -35,15 +35,22 @@ private:
   DenseSet<unsigned> RegArgs;
   DenseSet<unsigned> RegRets;
 
-  typedef std::vector<unsigned> RegisterList;
-  typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
-  typedef DenseMap<unsigned, std::string> RegisterNameMap;
   typedef DenseMap<int, std::string> FrameMap;
 
-  RegisterMap UsedRegs;
-  RegisterNameMap RegNames;
   FrameMap FrameSymbols;
 
+  struct RegisterInfo {
+    unsigned Reg;
+    unsigned Type;
+    unsigned Space;
+    unsigned Offset;
+    unsigned Encoded;
+  };
+
+  typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+  RegisterInfoMap RegInfo;
+
   PTXParamManager ParamManager;
 
 public:
@@ -51,13 +58,7 @@ public:
 
   PTXMachineFunctionInfo(MachineFunction &MF)
     : IsKernel(false) {
-      UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
-    }
+  }
 
   /// getParamManager - Returns the PTXParamManager instance for this function.
   PTXParamManager& getParamManager() { return ParamManager; }
@@ -78,69 +79,106 @@ public:
   reg_iterator retreg_begin() const { return RegRets.begin(); }
   reg_iterator retreg_end()   const { return RegRets.end(); }
 
+  /// addRegister - Adds a virtual register to the set of all used registers
+  void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+    if (!RegInfo.count(Reg)) {
+      RegisterInfo Info;
+      Info.Reg = Reg;
+      Info.Type = RegType;
+      Info.Space = RegSpace;
+
+      // Determine register offset
+      Info.Offset = 0;
+      for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+          e = RegInfo.end(); i != e; ++i) {
+        const RegisterInfo& RI = i->second;
+        if (RI.Space == RegSpace)
+          if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+            Info.Offset++;
+      }
+
+      // Encode the register data into a single register number
+      Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+      RegInfo[Reg] = Info;
+
+      if (RegSpace == PTXRegisterSpace::Argument)
+        RegArgs.insert(Reg);
+      else if (RegSpace == PTXRegisterSpace::Return)
+        RegRets.insert(Reg);
+    }
+  }
+
+  /// countRegisters - Returns the number of registers of the given type and
+  /// space.
+  unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+    unsigned Count = 0;
+    for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+        i != e; ++i) {
+      const RegisterInfo& RI = i->second;
+      if (RI.Type == RegType && RI.Space == RegSpace)
+        Count++;
+    }
+    return Count;
+  }
+
+  /// getEncodedRegister - Returns the encoded value of the register.
+  unsigned getEncodedRegister(unsigned Reg) const {
+    return RegInfo.lookup(Reg).Encoded;
+  }
+
   /// addRetReg - Adds a register to the set of return-value registers.
   void addRetReg(unsigned Reg) {
     if (!RegRets.count(Reg)) {
       RegRets.insert(Reg);
-      std::string name;
-      name = "%ret";
-      name += utostr(RegRets.size() - 1);
-      RegNames[Reg] = name;
     }
   }
 
   /// addArgReg - Adds a register to the set of function argument registers.
   void addArgReg(unsigned Reg) {
     RegArgs.insert(Reg);
-    std::string name;
-    name = "%param";
-    name += utostr(RegArgs.size() - 1);
-    RegNames[Reg] = name;
-  }
-
-  /// addVirtualRegister - Adds a virtual register to the set of all used
-  /// registers in the function.
-  void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
-    std::string name;
-
-    // Do not count registers that are argument/return registers.
-    if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
-      UsedRegs[TRC].push_back(Reg);
-      if (TRC == PTX::RegPredRegisterClass)
-        name = "%p";
-      else if (TRC == PTX::RegI16RegisterClass)
-        name = "%rh";
-      else if (TRC == PTX::RegI32RegisterClass)
-        name = "%r";
-      else if (TRC == PTX::RegI64RegisterClass)
-        name = "%rd";
-      else if (TRC == PTX::RegF32RegisterClass)
-        name = "%f";
-      else if (TRC == PTX::RegF64RegisterClass)
-        name = "%fd";
-      else
-        llvm_unreachable("Invalid register class");
-
-      name += utostr(UsedRegs[TRC].size() - 1);
-      RegNames[Reg] = name;
-    }
   }
 
   /// getRegisterName - Returns the name of the specified virtual register. This
   /// name is used during PTX emission.
-  const char *getRegisterName(unsigned Reg) const {
-    if (RegNames.count(Reg))
-      return RegNames.find(Reg)->second.c_str();
+  std::string getRegisterName(unsigned Reg) const {
+    if (RegInfo.count(Reg)) {
+      const RegisterInfo& RI = RegInfo.lookup(Reg);
+      std::string Name;
+      raw_string_ostream NameStr(Name);
+      decodeRegisterName(NameStr, RI.Encoded);
+      NameStr.flush();
+      return Name;
+    }
     else if (Reg == PTX::NoRegister)
       return "%noreg";
     else
       llvm_unreachable("Register not in register name map");
   }
 
-  /// getNumRegistersForClass - Returns the number of virtual registers that are
-  /// used for the specified register class.
-  unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
-    return UsedRegs.lookup(TRC).size();
+  /// getEncodedRegisterName - Returns the name of the encoded register.
+  std::string getEncodedRegisterName(unsigned EncodedReg) const {
+    std::string Name;
+    raw_string_ostream NameStr(Name);
+    decodeRegisterName(NameStr, EncodedReg);
+    NameStr.flush();
+    return Name;
+  }
+
+  /// getRegisterType - Returns the type of the specified virtual register.
+  unsigned getRegisterType(unsigned Reg) const {
+    if (RegInfo.count(Reg))
+      return RegInfo.lookup(Reg).Type;
+    else
+      llvm_unreachable("Unknown register");
+  }
+
+  /// getOffsetForRegister - Returns the offset of the virtual register
+  unsigned getOffsetForRegister(unsigned Reg) const {
+    if (RegInfo.count(Reg))
+      return RegInfo.lookup(Reg).Offset;
+    else
+      return 0;
   }
 
   /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
@@ -148,13 +186,13 @@ public:
     if (FrameSymbols.count(FrameIndex)) {
       return FrameSymbols.lookup(FrameIndex).c_str();
     } else {
-      std::string Name = "__local";
-      Name += utostr(FrameIndex);
+      std::string Name          = "__local";
+      Name                     += utostr(FrameIndex);
       // The whole point of caching this name is to ensure the pointer we pass
       // to any getExternalSymbol() calls will remain valid for the lifetime of
       // the back-end instance. This is to work around an issue in SelectionDAG
       // where symbol names are expected to be life-long strings.
-      FrameSymbols[FrameIndex] = Name;
+      FrameSymbols[FrameIndex]  = Name;
       return FrameSymbols[FrameIndex].c_str();
     }
   }
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 292ea5e..4efdc27 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -67,30 +67,16 @@ namespace {
     "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
   const char* DataLayout64 =
     "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-
-  // Copied from LLVMTargetMachine.cpp
-  void printNoVerify(PassManagerBase &PM, const char *Banner) {
-    if (PrintMachineCode)
-      PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-  }
-
-  void printAndVerify(PassManagerBase &PM,
-                      const char *Banner) {
-    if (PrintMachineCode)
-      PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
-    //if (VerifyMachineCode)
-    //  PM.add(createMachineVerifierPass(Banner));
-  }
 }
 
 // DataLayout and FrameLowering are filled with dummy data
 PTXTargetMachine::PTXTargetMachine(const Target &T,
                                    StringRef TT, StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     DataLayout(is64Bit ? DataLayout64 : DataLayout32),
     Subtarget(TT, CPU, FS, is64Bit),
     FrameLowering(Subtarget),
@@ -101,16 +87,18 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
 
 PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+  : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
 PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+  : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
 
 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) {
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 19f6c0f..22911f7 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -35,7 +35,7 @@ class PTXTargetMachine : public LLVMTargetMachine {
 
   public:
     PTXTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL,
                      bool is64Bit);
@@ -94,7 +94,7 @@ class PTX32TargetMachine : public PTXTargetMachine {
 public:
 
   PTX32TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 }; // class PTX32TargetMachine
@@ -103,7 +103,7 @@ class PTX64TargetMachine : public PTXTargetMachine {
 public:
 
   PTX64TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 }; // class PTX32TargetMachine
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 2366e45..d9a5da3 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo
   PTXTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
index 8e5285a..2cc30c4 100644
--- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PTXInfo
 parent = PTX
 required_libraries = MC Support Target
 add_to_library_groups = PTX
-
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 05c1ffd..1b85495 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -27,20 +27,6 @@ add_llvm_target(PowerPCCodeGen
   PPCSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCCodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMPowerPCAsmPrinter
-  LLVMPowerPCDesc
-  LLVMPowerPCInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 1d857e2..a605cc4 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter
   PPCInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
index afbb2b1..7c691de 100644
--- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCAsmPrinter
 parent = PowerPC
 required_libraries = MC Support
 add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
index 5baa988..95fac54 100644
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = PowerPC
@@ -28,4 +31,3 @@ name = PowerPCCodeGen
 parent = PowerPC
 required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
 add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index c4041db..febf438 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMPowerPCDesc
   PPCPredicates.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCDesc
-  LLVMMC
-  LLVMPowerPCAsmPrinter
-  LLVMPowerPCInfo
-  LLVMSupport
-  )
-
 add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
index fc2da83..d3a567d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCDesc
 parent = PowerPC
 required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
 add_to_library_groups = PowerPC
-
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9f2fd6d..34a5774 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -93,6 +93,16 @@ public:
     // FIXME.
     return false;
   }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+    return false;
+  }
+
   
   void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
     // FIXME.
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 56f622e..5dc2d3d 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,11 +365,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
       
   case PPC::MFCRpseud:
+  case PPC::MFCR8pseud:
     // Transform: %R3 = MFCRpseud %CR7
     // Into:      %R3 = MFCR      ;; cr7
     OutStreamer.AddComment(PPCInstPrinter::
                            getRegisterName(MI->getOperand(1).getReg()));
-    TmpInst.setOpcode(PPC::MFCR);
+    TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     OutStreamer.EmitInstruction(TmpInst);
     return;
@@ -441,7 +442,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
     Directive = PPC::DIR_970;
   if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
     Directive = PPC::DIR_7400;
-  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+  if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
     Directive = PPC::DIR_64;
   assert(Directive <= PPC::DIR_64 && "Directive out of range.");
   
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 4a1f182..9d2f4d0 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
 unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
                                              unsigned OpNo) const {
   const MachineOperand &MO = MI.getOperand(OpNo);
-  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+            MI.getOpcode() == PPC::MFOCRF) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
   return 0x80 >> getPPCRegisterNumbering(MO.getReg());
 }
@@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   if (MO.isReg()) {
     // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
     // The GPR operand should come through here though.
-    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+             MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
     return getPPCRegisterNumbering(MO.getReg());
   }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0b85fea..5c45018 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
   // epilog blocks.
   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn()) {
+    if (!I->empty() && I->back().isReturn()) {
       bool FoundIt = false;
       for (MBBI = I->end(); MBBI != I->begin(); ) {
         --MBBI;
@@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
   if (MF.getFunction()->hasFnAttr(Attribute::Naked))
     return false;
 
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
-    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+    MFI->hasVarSizedObjects() ||
+    (MF.getTarget().Options.GuaranteedTailCallOpt &&
+     MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
 
@@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
-  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+  if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
       MF.getFunction()->getCallingConv() == CallingConv::Fast) {
      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
     MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
   }
 
@@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // FIXME: doesn't detect whether or not we need to spill vXX, which requires
   //        r0 for now.
 
-  if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+  if (RegInfo->requiresRegisterScavenging(MF))
     if (needsFP(MF) || spillsCR(MF)) {
       const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
       const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
@@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
 
   // Take into account stack space reserved for tail calls.
   int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
     LowerBound = TCSPDelta;
   }
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3197fc8..ae317af 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -27,7 +27,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
   const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
   if (!MCID) {
     // This is a PPC pseudo-instruction.
-    // FIXME: Should something else be done?
     return;
   }
 
@@ -62,6 +61,7 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
 
 PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
   : TII(tii) {
+  LastWasBL8_ELF = false;
   EndDispatchGroup();
 }
 
@@ -80,12 +80,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
                                      bool &isFirst, bool &isSingle,
                                      bool &isCracked,
                                      bool &isLoad, bool &isStore) {
-  if ((int)Opcode >= 0) {
-    isFirst = isSingle = isCracked = isLoad = isStore = false;
-    return PPCII::PPC970_Pseudo;
-  }
-  Opcode = ~Opcode;
-
   const MCInstrDesc &MCID = TII.get(Opcode);
 
   isLoad  = MCID.mayLoad();
@@ -102,29 +96,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
 bool PPCHazardRecognizer970::
-isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+  const Value *LoadValue) const {
   for (unsigned i = 0, e = NumStores; i != e; ++i) {
     // Handle exact and commuted addresses.
-    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
-      return true;
-    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+    if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
       return true;
 
     // Okay, we don't have an exact match, if this is an indexed offset, see if
     // we have overlap (which happens during fp->int conversion for example).
-    if (StorePtr2[i] == Ptr2) {
-      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
-        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
-          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
-          // to see if the load and store actually overlap.
-          int StoreOffs = StoreOffset->getZExtValue();
-          int LoadOffs  = LoadOffset->getZExtValue();
-          if (StoreOffs < LoadOffs) {
-            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
-          } else {
-            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
-          }
-        }
+    if (StoreValue[i] == LoadValue) {
+      // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
+      // to see if the load and store actually overlap.
+      if (StoreOffset[i] < LoadOffset) {
+        if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+      } else {
+        if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+      }
     }
   }
   return false;
@@ -138,13 +126,26 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
 getHazardType(SUnit *SU, int Stalls) {
   assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
 
-  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  MachineInstr *MI = SU->getInstr();
+
+  if (MI->isDebugValue())
+    return NoHazard;
+
+  unsigned Opcode = MI->getOpcode();
+
+  // If the last instruction was a BL8_ELF, then the NOP must follow it
+  // directly (this is strong requirement from the linker due to the ELF ABI).
+  // We return only Hazard (and not NoopHazard) because if the NOP is necessary
+  // then it will already be in the instruction stream (it is not always
+  // necessary; tail calls, for example, do not need it).
+  if (LastWasBL8_ELF && Opcode != PPC::NOP)
+    return Hazard;
+
   bool isFirst, isSingle, isCracked, isLoad, isStore;
   PPCII::PPC970_Unit InstrType =
-    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+    GetInstrType(Opcode, isFirst, isSingle, isCracked,
                  isLoad, isStore);
   if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
-  unsigned Opcode = Node->getMachineOpcode();
 
   // We can only issue a PPC970_First/PPC970_Single instruction (such as
   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
@@ -181,55 +182,10 @@ getHazardType(SUnit *SU, int Stalls) {
 
   // If this is a load following a store, make sure it's not to the same or
   // overlapping address.
-  if (isLoad && NumStores) {
-    unsigned LoadSize;
-    switch (Opcode) {
-    default: llvm_unreachable("Unknown load!");
-    case PPC::LBZ:   case PPC::LBZU:
-    case PPC::LBZX:
-    case PPC::LBZ8:  case PPC::LBZU8:
-    case PPC::LBZX8:
-    case PPC::LVEBX:
-      LoadSize = 1;
-      break;
-    case PPC::LHA:   case PPC::LHAU:
-    case PPC::LHAX:
-    case PPC::LHZ:   case PPC::LHZU:
-    case PPC::LHZX:
-    case PPC::LVEHX:
-    case PPC::LHBRX:
-    case PPC::LHA8:   case PPC::LHAU8:
-    case PPC::LHAX8:
-    case PPC::LHZ8:   case PPC::LHZU8:
-    case PPC::LHZX8:
-      LoadSize = 2;
-      break;
-    case PPC::LFS:    case PPC::LFSU:
-    case PPC::LFSX:
-    case PPC::LWZ:    case PPC::LWZU:
-    case PPC::LWZX:
-    case PPC::LWA:
-    case PPC::LWAX:
-    case PPC::LVEWX:
-    case PPC::LWBRX:
-    case PPC::LWZ8:
-    case PPC::LWZX8:
-      LoadSize = 4;
-      break;
-    case PPC::LFD:    case PPC::LFDU:
-    case PPC::LFDX:
-    case PPC::LD:     case PPC::LDU:
-    case PPC::LDX:
-      LoadSize = 8;
-      break;
-    case PPC::LVX:
-    case PPC::LVXL:
-      LoadSize = 16;
-      break;
-    }
-
-    if (isLoadOfStoredAddress(LoadSize,
-                              Node->getOperand(0), Node->getOperand(1)))
+  if (isLoad && NumStores && !MI->memoperands_empty()) {
+    MachineMemOperand *MO = *MI->memoperands_begin();
+    if (isLoadOfStoredAddress(MO->getSize(),
+                              MO->getOffset(), MO->getValue()))
       return NoopHazard;
   }
 
@@ -237,66 +193,29 @@ getHazardType(SUnit *SU, int Stalls) {
 }
 
 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
-  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  MachineInstr *MI = SU->getInstr();
+
+  if (MI->isDebugValue())
+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
+
   bool isFirst, isSingle, isCracked, isLoad, isStore;
   PPCII::PPC970_Unit InstrType =
-    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+    GetInstrType(Opcode, isFirst, isSingle, isCracked,
                  isLoad, isStore);
   if (InstrType == PPCII::PPC970_Pseudo) return;
-  unsigned Opcode = Node->getMachineOpcode();
 
   // Update structural hazard information.
   if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
 
   // Track the address stored to.
-  if (isStore) {
-    unsigned ThisStoreSize;
-    switch (Opcode) {
-    default: llvm_unreachable("Unknown store instruction!");
-    case PPC::STB:    case PPC::STB8:
-    case PPC::STBU:   case PPC::STBU8:
-    case PPC::STBX:   case PPC::STBX8:
-    case PPC::STVEBX:
-      ThisStoreSize = 1;
-      break;
-    case PPC::STH:    case PPC::STH8:
-    case PPC::STHU:   case PPC::STHU8:
-    case PPC::STHX:   case PPC::STHX8:
-    case PPC::STVEHX:
-    case PPC::STHBRX:
-      ThisStoreSize = 2;
-      break;
-    case PPC::STFS:
-    case PPC::STFSU:
-    case PPC::STFSX:
-    case PPC::STWX:   case PPC::STWX8:
-    case PPC::STWUX:
-    case PPC::STW:    case PPC::STW8:
-    case PPC::STWU:
-    case PPC::STVEWX:
-    case PPC::STFIWX:
-    case PPC::STWBRX:
-      ThisStoreSize = 4;
-      break;
-    case PPC::STD_32:
-    case PPC::STDX_32:
-    case PPC::STD:
-    case PPC::STDU:
-    case PPC::STFD:
-    case PPC::STFDX:
-    case PPC::STDX:
-    case PPC::STDUX:
-      ThisStoreSize = 8;
-      break;
-    case PPC::STVX:
-    case PPC::STVXL:
-      ThisStoreSize = 16;
-      break;
-    }
-
-    StoreSize[NumStores] = ThisStoreSize;
-    StorePtr1[NumStores] = Node->getOperand(1);
-    StorePtr2[NumStores] = Node->getOperand(2);
+  if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+    MachineMemOperand *MO = *MI->memoperands_begin();
+    StoreSize[NumStores] = MO->getSize();
+    StoreOffset[NumStores] = MO->getOffset();
+    StoreValue[NumStores] = MO->getValue();
     ++NumStores;
   }
 
@@ -319,3 +238,9 @@ void PPCHazardRecognizer970::AdvanceCycle() {
   if (NumIssued == 5)
     EndDispatchGroup();
 }
+
+void PPCHazardRecognizer970::Reset() {
+  LastWasBL8_ELF = false;
+  EndDispatchGroup();
+}
+
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 32fac91..95d0d64 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -49,14 +49,18 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
   // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
   bool HasCTRSet;
 
+  // Was the last instruction issued a BL8_ELF
+  bool LastWasBL8_ELF;
+
   // StoredPtr - Keep track of the address of any store.  If we see a load from
   // the same address (or one that aliases it), disallow the store.  We can have
   // up to four stores in one dispatch group, hence we track up to 4.
   //
   // This is null if we haven't seen a store yet.  We keep track of both
   // operands of the store here, since we support [r+r] and [r+i] addressing.
-  SDValue StorePtr1[4], StorePtr2[4];
-  unsigned  StoreSize[4];
+  const Value *StoreValue[4];
+  int64_t StoreOffset[4];
+  uint64_t StoreSize[4];
   unsigned NumStores;
 
 public:
@@ -64,6 +68,7 @@ public:
   virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void EmitInstruction(SUnit *SU);
   virtual void AdvanceCycle();
+  virtual void Reset();
 
 private:
   /// EndDispatchGroup - Called when we are finishing a new dispatch group.
@@ -76,8 +81,8 @@ private:
                                   bool &isFirst, bool &isSingle,bool &isCracked,
                                   bool &isLoad, bool &isStore);
 
-  bool isLoadOfStoredAddress(unsigned LoadSize,
-                             SDValue Ptr1, SDValue Ptr2) const;
+  bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+                             const Value *LoadValue) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3dee406..4a509a3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -210,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
 
   // Find all return blocks, outputting a restore in each epilog.
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
-    if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    if (!BB->empty() && BB->back().isReturn()) {
       IP = BB->end(); --IP;
 
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = IP;
-      while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+      while (I2 != BB->begin() && (--I2)->isTerminator())
         IP = I2;
 
       // Emit: MTVRSAVE InVRSave
@@ -1066,7 +1066,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     SDValue Target = N->getOperand(1);
     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
-    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
                                            Chain), 0);
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 36d5c41..f3a3d17 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -103,6 +103,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
+  // We do not currently implment this libm ops for PowerPC.
+  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
   // PowerPC has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -146,9 +153,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
   setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
@@ -332,7 +343,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::CTPOP, VT, Expand);
       setOperationAction(ISD::CTLZ, VT, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
     }
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -1667,7 +1680,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 4;
 
   // Assign locations to all of the incoming arguments.
@@ -1857,7 +1871,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
@@ -2263,9 +2278,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                       PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
-  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
-    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
-      getStackAlignment();
+  if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+      getFrameLowering()->getStackAlignment();
     unsigned AlignMask = TargetAlign-1;
     NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   }
@@ -2299,7 +2314,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
-  if (!GuaranteedTailCallOpt)
+  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
     return false;
 
   // Variable argument functions are not supported.
@@ -2752,7 +2767,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   // the stack. Account for this here so these bytes can be pushed back on in
   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
-    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+    (CallConv == CallingConv::Fast &&
+     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
@@ -2868,7 +2884,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   // Count how many bytes are to be pushed on the stack, including the linkage
@@ -3075,7 +3092,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   unsigned nAltivecParamsAtEnd = 0;
@@ -5754,7 +5772,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+               MFI->hasVarSizedObjects()) &&
                   MFI->getStackSize() &&
                   !MF.getFunction()->hasFnAttr(Attribute::Naked);
   unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e88ad37..cdbc264 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -223,6 +223,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
+// 64-but CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+                      "mtcrf $FXM, $rS", BrMCRX>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+                       "", SprMFCR>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+            
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
 
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
@@ -469,6 +481,12 @@ def RLDICR : MDForm_1<30, 1,
                       (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
                       "rldicr $rA, $rS, $SH, $ME", IntRotateD,
                       []>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+                     (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>;
+
 }  // End FXU Operations.
 
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index b9a6297..6d16f1d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,8 +33,8 @@
 #include "PPCGenInstrInfo.inc"
 
 namespace llvm {
-extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
-extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> DisablePPC32RS;
+extern cl::opt<bool> DisablePPC64RS;
 }
 
 using namespace llvm;
@@ -48,25 +48,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
 ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
   const TargetMachine *TM,
   const ScheduleDAG *DAG) const {
-  // Should use subtarget info to pick the right hazard recognizer.  For
-  // now, always return a PPC970 recognizer.
-  const TargetInstrInfo *TII = TM->getInstrInfo();
-  (void)TII;
-  assert(TII && "No InstrInfo?");
-
   unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
   if (Directive == PPC::DIR_440) {
     const InstrItineraryData *II = TM->getInstrItineraryData();
     return new PPCHazardRecognizer440(II, DAG);
   }
-  else {
-    // Disable the hazard recognizer for now, as it doesn't support
-    // bottom-up scheduling.
-    //return new PPCHazardRecognizer970(*TII);
-    return new ScheduleHazardRecognizer();
-  }
+
+  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
 }
 
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+  const InstrItineraryData *II,
+  const ScheduleDAG *DAG) const {
+  unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+  // Most subtargets use a PPC970 recognizer.
+  if (Directive != PPC::DIR_440) {
+    const TargetInstrInfo *TII = TM.getInstrInfo();
+    assert(TII && "No InstrInfo?");
+
+    return new PPCHazardRecognizer970(*TII);
+  }
+
+  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
 unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
   switch (MI->getOpcode()) {
@@ -338,6 +345,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
 }
 
+// This function returns true if a CR spill is necessary and false otherwise.
 bool
 PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   unsigned SrcReg, bool isKill,
@@ -369,7 +377,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                          FrameIdx));
     } else {
       // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // we use X11, which we know cannot be used in the prolog/epilog.  This is
       // a hack.
       NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
@@ -388,9 +396,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
   } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
-    if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      // FIXME (64-bit): Enable
+    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
                                          .addReg(SrcReg,
                                                  getKillRegState(isKill)),
@@ -403,11 +410,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       // We hack this on Darwin by reserving R2.  It's probably broken on Linux
       // at the moment.
 
+      bool is64Bit = TM.getSubtargetImpl()->isPPC64();
       // We need to store the CR in the low 4-bits of the saved value.  First,
       // issue a MFCR to save all of the CRBits.
       unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                           PPC::R2 : PPC::R0;
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+                              (is64Bit ? PPC::X2 : PPC::R2) :
+                              (is64Bit ? PPC::X0 : PPC::R0);
+      NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
+                                             PPC::MFCRpseud), ScratchReg)
                                .addReg(SrcReg, getKillRegState(isKill)));
 
       // If the saved register wasn't CR0, shift the bits left so that they are
@@ -415,12 +425,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       if (SrcReg != PPC::CR0) {
         unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
         // rlwinm scratch, scratch, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+        NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
+                           PPC::RLWINM), ScratchReg)
                        .addReg(ScratchReg).addImm(ShiftBits)
                        .addImm(0).addImm(31));
       }
 
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
+                                           PPC::STW8 : PPC::STW))
                                          .addReg(ScratchReg,
                                                  getKillRegState(isKill)),
                                          FrameIdx));
@@ -504,7 +516,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   NewMIs.back()->addMemOperand(MF, MMO);
 }
 
-void
+bool
 PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
                                    const TargetRegisterClass *RC,
@@ -524,8 +536,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                          FrameIdx));
     } else {
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
-                                                 PPC::R11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+                                                 PPC::X11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
     }
   } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
@@ -534,28 +546,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
-    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-    // it's possible for the stack frame to be so big the save location is
-    // out of range of immediate offsets, necessitating another register.
-    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-    // at the moment.
-    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                          PPC::R2 : PPC::R0;
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                       ScratchReg), FrameIdx));
-
-    // If the reloaded register isn't CR0, shift the bits right so that they are
-    // in the right CR's slot.
-    if (DestReg != PPC::CR0) {
-      unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
-      // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
-                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
-                    .addImm(31));
+    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                                 get(PPC::RESTORE_CR), DestReg)
+                                         , FrameIdx));
+      return true;
+    } else {
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                            PPC::R2 : PPC::R0;
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                         ScratchReg), FrameIdx));
+  
+      // If the reloaded register isn't CR0, shift the bits right so that they are
+      // in the right CR's slot.
+      if (DestReg != PPC::CR0) {
+        unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+        // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                      .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                      .addImm(31));
+      }
+  
+      NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
+                         PPC::MTCRF8 : PPC::MTCRF), DestReg)
+                       .addReg(ScratchReg));
     }
-
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
-                     .addReg(ScratchReg));
   } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
 
     unsigned Reg = 0;
@@ -600,6 +621,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
   } else {
     llvm_unreachable("Unknown regclass!");
   }
+
+  return false;
 }
 
 void
@@ -612,7 +635,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   SmallVector<MachineInstr*, 4> NewMIs;
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
-  LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
+    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+    FuncInfo->setSpillsCR();
+  }
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 90bacc9..e90f8cb 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                            unsigned SrcReg, bool isKill, int FrameIdx,
                            const TargetRegisterClass *RC,
                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+  bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -88,6 +88,9 @@ public:
   ScheduleHazardRecognizer *
   CreateTargetHazardRecognizer(const TargetMachine *TM,
                                const ScheduleDAG *DAG) const;
+  ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                     const ScheduleDAG *DAG) const;
 
   unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                int &FrameIndex) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 17f63e0..d4c9d10 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -349,7 +349,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
 def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
 def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
@@ -399,7 +399,14 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
 
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
-def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+                     "", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
                      "", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
@@ -1091,7 +1098,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
-def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3ba9260..27f7f4a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -46,15 +46,14 @@
 #define GET_REGINFO_TARGET_DESC
 #include "PPCGenRegisterInfo.inc"
 
-// FIXME (64-bit): Eventually enable by default.
 namespace llvm {
-cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
                                    cl::init(false),
-                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::desc("Disable PPC32 register scavenger"),
                                    cl::Hidden);
-cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
                                    cl::init(false),
-                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::desc("Disable PPC64 register scavenger"),
                                    cl::Hidden);
 }
 
@@ -63,8 +62,8 @@ using namespace llvm;
 // FIXME (64-bit): Should be inlined.
 bool
 PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
-  return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
-          (EnablePPC64RS && Subtarget.isPPC64()));
+  return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
+          (!DisablePPC64RS && Subtarget.isPPC64()));
 }
 
 PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
@@ -120,10 +119,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
     
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
     PPC::LR,  0
   };
 
@@ -149,10 +144,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
     
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
     0
   };
   // 64-bit Darwin calling convention. 
@@ -174,10 +165,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
     
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
     PPC::LR8,  0
   };
 
@@ -203,10 +190,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
 
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-
     0
   };
   
@@ -247,9 +230,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R13);
     Reserved.set(PPC::R31);
 
-    if (!requiresRegisterScavenging(MF))
-      Reserved.set(PPC::R0);    // FIXME (64-bit): Remove
-
     Reserved.set(PPC::X0);
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
@@ -259,7 +239,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     if (Subtarget.isSVR4ABI()) {
       Reserved.set(PPC::X2);
     }
-    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // Reserve X2 on Darwin to hack around the problem of save/restore of CR
     // when the stack frame is too big to address directly; we need two regs.
     // This is a hack.
     if (Subtarget.isDarwinABI()) {
@@ -291,6 +271,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case PPC::F4RCRegClassID:
   case PPC::VRRCRegClassID:
     return 32 - DefaultSafety;
+  case PPC::CRRCRegClassID:
+    return 8 - DefaultSafety;
   }
 }
 
@@ -301,7 +283,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 void PPCRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      I->getOpcode() == PPC::ADJCALLSTACKUP) {
     // Add (actually subtract) back the amount the callee popped on return.
     if (int CalleeAmt =  I->getOperand(1).getImm()) {
       bool is64Bit = Subtarget.isPPC64();
@@ -476,28 +459,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
                                       unsigned FrameIndex, int SPAdj,
                                       RegScavenger *RS) const {
   // Get the instruction.
-  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>, <FI>
+  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-  const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
-  unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
-  unsigned SrcReg = MI.getOperand(0).getReg();
+  // FIXME: Once LLVM supports creating virtual registers here, or the register
+  // scavenger can return multiple registers, stop using reserved registers
+  // here.
+  (void) SPAdj;
+  (void) RS;
+
   bool LP64 = Subtarget.isPPC64();
+  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
+                                            (LP64 ? PPC::X0 : PPC::R0);
+  unsigned SrcReg = MI.getOperand(0).getReg();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
-  BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
           .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
     
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
   if (SrcReg != PPC::CR0)
     // rlwinm rA, rA, ShiftBits, 0, 31.
-    BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
       .addReg(Reg, RegState::Kill)
       .addImm(getPPCRegisterNumbering(SrcReg) * 4)
       .addImm(0)
@@ -511,6 +498,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+                                      unsigned FrameIndex, int SPAdj,
+                                      RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_CR <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // FIXME: Once LLVM supports creating virtual registers here, or the register
+  // scavenger can return multiple registers, stop using reserved registers
+  // here.
+  (void) SPAdj;
+  (void) RS;
+
+  bool LP64 = Subtarget.isPPC64();
+  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
+                                            (LP64 ? PPC::X0 : PPC::R0);
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(MI.definesRegister(DestReg) &&
+    "RESTORE_CR does not define its destination");
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+                              Reg), FrameIndex);
+
+  // If the reloaded register isn't CR0, shift the bits right so that they are
+  // in the right CR's slot.
+  if (DestReg != PPC::CR0) {
+    unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+    // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+    BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+             .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+             .addImm(31);
+  }
+
+  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+             .addReg(Reg);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, RegScavenger *RS) const {
@@ -556,16 +585,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     return;
   }
 
-  // Special case for pseudo-op SPILL_CR.
-  if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+  // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+  if (requiresRegisterScavenging(MF)) {
     if (OpC == PPC::SPILL_CR) {
       lowerCRSpilling(II, FrameIndex, SPAdj, RS);
       return;
+    } else if (OpC == PPC::RESTORE_CR) {
+      lowerCRRestore(II, FrameIndex, SPAdj, RS);
+      return;
     }
+  }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+  bool is64Bit = Subtarget.isPPC64();
   MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
-                                              PPC::R31 : PPC::R1,
+                                              (is64Bit ? PPC::X31 : PPC::R31) :
+                                                (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
 
   // Figure out if the offset in the instruction is shifted right two bits. This
@@ -611,19 +647,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // The offset doesn't fit into a single register, scavenge one to build the
   // offset in.
-  // FIXME: figure out what SPAdj is doing here.
 
-  // FIXME (64-bit): Use "findScratchRegister".
   unsigned SReg;
-  if (requiresRegisterScavenging(MF))
-    SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
-  else
-    SReg = PPC::R0;
+  if (requiresRegisterScavenging(MF)) {
+    const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+    SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
+  } else
+    SReg = is64Bit ? PPC::X0 : PPC::R0;
 
   // Insert a set of rA with the full offset value before the ld, st, or add
-  BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
     .addImm(Offset >> 16);
-  BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
     .addReg(SReg, RegState::Kill)
     .addImm(Offset);
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f70a594..faf690f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -57,6 +57,8 @@ public:
                          int SPAdj, RegScavenger *RS) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
                        int SPAdj, RegScavenger *RS) const;
+  void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+                       int SPAdj, RegScavenger *RS) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 8acf75c..baa0eb5 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
+#include "PPCRegisterInfo.h"
 #include "PPC.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
@@ -140,3 +141,22 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
   return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
          GV->hasCommonLinkage() || isDecl;
 }
+
+bool PPCSubtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  if (DarwinDirective == PPC::DIR_440)
+    return false;
+
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+
+  if (isPPC64())
+    CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+  else
+    CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+  return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index d2b853d..62b2424 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -148,6 +148,10 @@ public:
   bool isDarwinABI() const { return isDarwin(); }
   bool isSVR4ABI() const { return !isDarwin(); }
 
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index de8fca0..8e71c46 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -28,10 +28,11 @@ extern "C" void LLVMInitializePowerPCTarget() {
 
 PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
@@ -45,17 +46,19 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
 
 PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, 
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
 
 PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, 
                                        StringRef CPU,  StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
 
 
@@ -81,7 +84,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (Subtarget.isPPC64())
     // Temporary workaround for the inability of PPC64 JIT to handle jump
     // tables.
-    DisableJumpTables = true;      
+    Options.DisableJumpTables = true;      
   
   // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
   // writing?
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 03b27c6..0427876 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,7 +41,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
 
 public:
   PPCTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
                    CodeGenOpt::Level OL, bool is64Bit);
 
@@ -79,7 +79,7 @@ public:
 class PPC32TargetMachine : public PPCTargetMachine {
 public:
   PPC32TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 };
@@ -89,7 +89,7 @@ public:
 class PPC64TargetMachine : public PPCTargetMachine {
 public:
   PPC64TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 };
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index f63111f..fdb8a62 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo
   PowerPCTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
index f51b417..f77d85b 100644
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = PowerPCInfo
 parent = PowerPC
 required_libraries = MC Support Target
 add_to_library_groups = PowerPC
-
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 9687951..56ee7c2 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -22,17 +22,5 @@ add_llvm_target(SparcCodeGen
   SparcSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSparcDesc
-  LLVMSparcInfo
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index dab35e5..9295408 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
 
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
 
@@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
   }
 
   //Call's delay filler can def some of call's uses.
-  if (slot->getDesc().isCall())
+  if (slot->isCall())
     insertCallUses(slot, RegUses);
   else
     insertDefsUses(slot, RegDefs, RegUses);
@@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
     if (I->hasUnmodeledSideEffects()
         || I->isInlineAsm()
         || I->isLabel()
-        || I->getDesc().hasDelaySlot()
+        || I->hasDelaySlot()
         || isDelayFiller(MBB, I))
       break;
 
@@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
   if (candidate->isImplicitDef() || candidate->isKill())
     return true;
 
-  if (candidate->getDesc().mayLoad()) {
+  if (candidate->mayLoad()) {
     sawLoad = true;
     if (sawStore)
       return true;
   }
 
-  if (candidate->getDesc().mayStore()) {
+  if (candidate->mayStore()) {
     if (sawStore)
       return true;
     sawStore = true;
@@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
     return false;
   if (candidate->getOpcode() == SP::UNIMP)
     return true;
-  const MCInstrDesc &prevdesc = (--candidate)->getDesc();
-  return prevdesc.hasDelaySlot();
+  --candidate;
+  return candidate->hasDelaySlot();
 }
 
 bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
 {
-  if (!I->getDesc().isCall())
+  if (!I->isCall())
     return false;
 
   unsigned structSizeOpNum = 0;
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
index 38c797f..fe20d2f 100644
--- a/lib/Target/Sparc/LLVMBuild.txt
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = Sparc
@@ -27,4 +30,3 @@ name = SparcCodeGen
 parent = Sparc
 required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
 add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index d3bdf0b..9d4db4d 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc
   SparcMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcDesc
-  LLVMMC
-  LLVMSparcInfo
-  LLVMSupport
-  )
-
 add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
index a339cec..97f8f16 100644
--- a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcDesc
 parent = Sparc
 required_libraries = MC SparcInfo Support
 add_to_library_groups = Sparc
-
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index deb39d9..7548bbf 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -236,9 +236,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+  while (I != Pred->begin() && !(--I)->isTerminator())
     ; // Noop
-  return I == Pred->end() || !I->getDesc().isBarrier();
+  return I == Pred->end() || !I->isBarrier();
 }
 
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 25104d1..3608d3b 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -763,7 +763,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7a6bf50..5290d42 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -133,7 +133,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       break;
 
     //Terminator is not a branch
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     //Handle Unconditional branches
@@ -195,7 +195,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
           .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
           .addMBB(TargetBB);
-        MBB.addSuccessor(TargetBB);
+
         OldInst->eraseFromParent();
         UnCondBrIter->eraseFromParent();
 
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 7dff799..8e16fd7 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -26,10 +26,11 @@ extern "C" void LLVMInitializeSparcTarget() {
 ///
 SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, 
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL,
                                        bool is64bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
@@ -52,16 +53,20 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){
 
 SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
                                            StringRef TT, StringRef CPU,
-                                           StringRef FS, Reloc::Model RM,
+                                           StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
                                            CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-  : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {
+  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
 SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
                                            StringRef TT,  StringRef CPU,
-                                           StringRef FS, Reloc::Model RM,
+                                           StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
                                            CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-  : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {
+  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 63bfa5d..cedc1e3 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,9 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OL,  bool is64bit);
+                     CodeGenOpt::Level OL, bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
@@ -65,6 +65,7 @@ class SparcV8TargetMachine : public SparcTargetMachine {
 public:
   SparcV8TargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
+                       const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
 };
@@ -75,6 +76,7 @@ class SparcV9TargetMachine : public SparcTargetMachine {
 public:
   SparcV9TargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
+                       const TargetOptions &Options,
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
 };
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index a076023..b0d031e 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo
   SparcTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
index 81c9032..b5c320f 100644
--- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = SparcInfo
 parent = Sparc
 required_libraries = MC Support Target
 add_to_library_groups = Sparc
-
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d52ecb3..440f9ad 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 }
 
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                                   SDNode *DefNode, unsigned DefIdx,
-                                   SDNode *UseNode, unsigned UseIdx) const {
-  if (!ItinData || ItinData->isEmpty())
-    return -1;
-
-  if (!DefNode->isMachineOpcode())
-    return -1;
-
-  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
-  if (!UseNode->isMachineOpcode())
-    return ItinData->getOperandCycle(DefClass, DefIdx);
-  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
-  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
 int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                      const MachineInstr *MI,
                                      unsigned *PredCost) const {
@@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
 }
 
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
-                                     SDNode *N) const {
-  if (!ItinData || ItinData->isEmpty())
-    return 1;
-
-  if (!N->isMachineOpcode())
-    return 1;
-
-  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
 bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
                                        const MachineInstr *DefMI,
                                        unsigned DefIdx) const {
@@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 }
 
 
-bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
-
-  // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
-    return true;
-  if (!MCID.isPredicable())
-    return true;
-  return !isPredicated(MI);
-}
-
-
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
 /// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index aa2e014..768facb 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -22,15 +22,96 @@ char TargetLibraryInfo::ID = 0;
 
 const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
   {
-    "memset",
+    "acos",
+    "acosl",
+    "acosf",
+    "asin",
+    "asinl",
+    "asinf",
+    "atan",
+    "atanl",
+    "atanf",
+    "atan2",
+    "atan2l",
+    "atan2f",
+    "ceil",
+    "ceill",
+    "ceilf",
+    "copysign",
+    "copysignf",
+    "copysignl",
+    "cos",
+    "cosl",
+    "cosf",
+    "cosh",
+    "coshl",
+    "coshf",
+    "exp",
+    "expl",
+    "expf",
+    "exp2",
+    "exp2l",
+    "exp2f",
+    "expm1",
+    "expm1l",
+    "expl1f",
+    "fabs",
+    "fabsl",
+    "fabsf",
+    "floor",
+    "floorl",
+    "floorf",
+    "fiprintf",
+    "fmod",
+    "fmodl",
+    "fmodf",
+    "fputs",
+    "fwrite",
+    "iprintf",
+    "log",
+    "logl",
+    "logf",
+    "log2",
+    "log2l",
+    "log2f",
+    "log10",
+    "log10l",
+    "log10f",
+    "log1p",
+    "log1pl",
+    "log1pf",
     "memcpy",
     "memmove",
+    "memset",
     "memset_pattern16",
-    "iprintf",
+    "nearbyint",
+    "nearbyintf",
+    "nearbyintl",
+    "pow",
+    "powf",
+    "powl",
+    "rint",
+    "rintf",
+    "rintl",
+    "sin",
+    "sinl",
+    "sinf",
+    "sinh",
+    "sinhl",
+    "sinhf",
     "siprintf",
-    "fiprintf",
-    "fwrite",
-    "fputs"
+    "sqrt",
+    "sqrtl",
+    "sqrtf",
+    "tan",
+    "tanl",
+    "tanf",
+    "tanh",
+    "tanhl",
+    "tanhf",
+    "trunc",
+    "truncf",
+    "truncl"
   };
 
 /// initialize - Initialize the set of available library functions based on the
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 56b7b69..fc8b67b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -48,7 +48,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
 }
 
-static bool isSuitableForBSS(const GlobalVariable *GV) {
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
   const Constant *C = GV->getInitializer();
 
   // Must have zero initializer.
@@ -133,7 +133,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
 
   // Handle thread-local data first.
   if (GVar->isThreadLocal()) {
-    if (isSuitableForBSS(GVar))
+    if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
       return SectionKind::getThreadBSS();
     return SectionKind::getThreadData();
   }
@@ -143,7 +143,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getCommon();
 
   // Variable can be easily put to BSS section.
-  if (isSuitableForBSS(GVar)) {
+  if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
     if (GVar->hasLocalLinkage())
       return SectionKind::getBSSLocal();
     else if (GVar->hasExternalLinkage())
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 805e16e..fb7bbbb 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,8 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -24,153 +22,11 @@ using namespace llvm;
 //
 
 namespace llvm {
-  bool LessPreciseFPMADOption;
-  bool PrintMachineCode;
-  bool NoFramePointerElim;
-  bool NoFramePointerElimNonLeaf;
-  bool NoExcessFPPrecision;
-  bool UnsafeFPMath;
-  bool NoInfsFPMath;
-  bool NoNaNsFPMath;
-  bool HonorSignDependentRoundingFPMathOption;
-  bool UseSoftFloat;
-  FloatABI::ABIType FloatABIType;
-  bool NoImplicitFloat;
-  bool NoZerosInBSS;
-  bool JITExceptionHandling;
-  bool JITEmitDebugInfo;
-  bool JITEmitDebugInfoToDisk;
-  bool GuaranteedTailCallOpt;
-  unsigned StackAlignmentOverride;
-  bool RealignStack;
-  bool DisableJumpTables;
   bool StrongPHIElim;
   bool HasDivModLibcall;
   bool AsmVerbosityDefault(false);
-  bool EnableSegmentedStacks;
 }
 
-static cl::opt<bool, true>
-PrintCode("print-machineinstrs",
-  cl::desc("Print generated machine code"),
-  cl::location(PrintMachineCode), cl::init(false));
-static cl::opt<bool, true>
-DisableFPElim("disable-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization"),
-  cl::location(NoFramePointerElim),
-  cl::init(false));
-static cl::opt<bool, true>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
-  cl::location(NoFramePointerElimNonLeaf),
-  cl::init(false));
-static cl::opt<bool, true>
-DisableExcessPrecision("disable-excess-fp-precision",
-  cl::desc("Disable optimizations that may increase FP precision"),
-  cl::location(NoExcessFPPrecision),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableFPMAD("enable-fp-mad",
-  cl::desc("Enable less precise MAD instructions to be generated"),
-  cl::location(LessPreciseFPMADOption),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
-  cl::desc("Enable optimizations that may decrease FP precision"),
-  cl::location(UnsafeFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
-  cl::desc("Enable FP math optimizations that assume no +-Infs"),
-  cl::location(NoInfsFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
-  cl::desc("Enable FP math optimizations that assume no NaNs"),
-  cl::location(NoNaNsFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
-  cl::Hidden,
-  cl::desc("Force codegen to assume rounding mode can change dynamically"),
-  cl::location(HonorSignDependentRoundingFPMathOption),
-  cl::init(false));
-static cl::opt<bool, true>
-GenerateSoftFloatCalls("soft-float",
-  cl::desc("Generate software floating point library calls"),
-  cl::location(UseSoftFloat),
-  cl::init(false));
-static cl::opt<llvm::FloatABI::ABIType, true>
-FloatABIForCalls("float-abi",
-  cl::desc("Choose float ABI type"),
-  cl::location(FloatABIType),
-  cl::init(FloatABI::Default),
-  cl::values(
-    clEnumValN(FloatABI::Default, "default",
-               "Target default float ABI type"),
-    clEnumValN(FloatABI::Soft, "soft",
-               "Soft float ABI (implied by -soft-float)"),
-    clEnumValN(FloatABI::Hard, "hard",
-               "Hard float ABI (uses FP registers)"),
-    clEnumValEnd));
-static cl::opt<bool, true>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
-  cl::desc("Don't place zero-initialized symbols into bss section"),
-  cl::location(NoZerosInBSS),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableJITExceptionHandling("jit-enable-eh",
-  cl::desc("Emit exception handling information"),
-  cl::location(JITExceptionHandling),
-  cl::init(false));
-// In debug builds, make this default to true.
-#ifdef NDEBUG
-#define EMIT_DEBUG false
-#else
-#define EMIT_DEBUG true
-#endif
-static cl::opt<bool, true>
-EmitJitDebugInfo("jit-emit-debug",
-  cl::desc("Emit debug information to debugger"),
-  cl::location(JITEmitDebugInfo),
-  cl::init(EMIT_DEBUG));
-#undef EMIT_DEBUG
-static cl::opt<bool, true>
-EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
-  cl::Hidden,
-  cl::desc("Emit debug info objfiles to disk"),
-  cl::location(JITEmitDebugInfoToDisk),
-  cl::init(false));
-
-static cl::opt<bool, true>
-EnableGuaranteedTailCallOpt("tailcallopt",
-  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
-  cl::location(GuaranteedTailCallOpt),
-  cl::init(false));
-static cl::opt<unsigned, true>
-OverrideStackAlignment("stack-alignment",
-  cl::desc("Override default stack alignment"),
-  cl::location(StackAlignmentOverride),
-  cl::init(0));
-static cl::opt<bool, true>
-EnableRealignStack("realign-stack",
-  cl::desc("Realign stack if needed"),
-  cl::location(RealignStack),
-  cl::init(true));
-static cl::opt<bool, true>
-DisableSwitchTables(cl::Hidden, "disable-jump-tables", 
-  cl::desc("Do not generate jump tables."),
-  cl::location(DisableJumpTables),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
-  cl::desc("Use strong PHI elimination."),
-  cl::location(StrongPHIElim),
-  cl::init(false));
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
-  cl::desc("Emit a call to trap function rather than a trap instruction"),
-  cl::init(""));
 static cl::opt<bool>
 DataSections("fdata-sections",
   cl::desc("Emit data into separate sections"),
@@ -179,18 +35,14 @@ static cl::opt<bool>
 FunctionSections("ffunction-sections",
   cl::desc("Emit functions into separate sections"),
   cl::init(false));
-static cl::opt<bool, true>
-SegmentedStacks("segmented-stacks",
-  cl::desc("Use segmented stacks if possible."),
-  cl::location(EnableSegmentedStacks),
-  cl::init(false));
                          
 //---------------------------------------------------------------------------
 // TargetMachine Class
 //
 
 TargetMachine::TargetMachine(const Target &T,
-                             StringRef TT, StringRef CPU, StringRef FS)
+                             StringRef TT, StringRef CPU, StringRef FS,
+                             const TargetOptions &Options)
   : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
     CodeGenInfo(0), AsmInfo(0),
     MCRelaxAll(false),
@@ -198,11 +50,8 @@ TargetMachine::TargetMachine(const Target &T,
     MCSaveTempLabels(false),
     MCUseLoc(true),
     MCUseCFI(true),
-    MCUseDwarfDirectory(false) {
-  // Typically it will be subtargets that will adjust FloatABIType from Default
-  // to Soft or Hard.
-  if (UseSoftFloat)
-    FloatABIType = FloatABI::Soft;
+    MCUseDwarfDirectory(false),
+    Options(Options) {
 }
 
 TargetMachine::~TargetMachine() {
@@ -258,36 +107,3 @@ void TargetMachine::setDataSections(bool V) {
   DataSections = V;
 }
 
-namespace llvm {
-  /// DisableFramePointerElim - This returns true if frame pointer elimination
-  /// optimization should be disabled for the given machine function.
-  bool DisableFramePointerElim(const MachineFunction &MF) {
-    // Check to see if we should eliminate non-leaf frame pointers and then
-    // check to see if we should eliminate all frame pointers.
-    if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
-      const MachineFrameInfo *MFI = MF.getFrameInfo();
-      return MFI->hasCalls();
-    }
-
-    return NoFramePointerElim;
-  }
-
-  /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
-  /// is specified on the command line.  When this flag is off(default), the
-  /// code generator is not allowed to generate mad (multiply add) if the
-  /// result is "less precise" than doing those operations individually.
-  bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
-
-  /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
-  /// that the rounding mode of the FPU can change from its default.
-  bool HonorSignDependentRoundingFPMath() {
-    return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
-  }
-
-  /// getTrapFunctionName - If this returns a non-empty string, this means isel
-  /// should lower Intrinsic::trap to a call to the specified function name
-  /// instead of an ISD::TRAP node.
-  StringRef getTrapFunctionName() {
-    return TrapFuncName;
-  }
-}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 67239b8..2689837 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,8 +13,6 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 94aca7a..47489bb 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser
   X86AsmParser.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86AsmParser
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMX86Desc
-  LLVMX86Info
-  )
-
 add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 6c2405a..9f94d5d 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmParser
 parent = X86
 required_libraries = MC MCParser Support X86Desc X86Info
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4542d4b..be15899 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -51,21 +51,6 @@ endif()
 
 add_llvm_target(X86CodeGen ${sources})
 
-add_llvm_library_dependencies(LLVMX86CodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  LLVMX86AsmPrinter
-  LLVMX86Desc
-  LLVMX86Info
-  LLVMX86Utils
-  )
-
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 4f570d5..0cd6db9 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler
   X86DisassemblerDecoder.c
   )
 
-add_llvm_library_dependencies(LLVMX86Disassembler
-  LLVMMC
-  LLVMSupport
-  LLVMX86Info
-  )
-
 # workaround for hanging compilation on MSVC9 and 10
 if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
 set_property(
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
index cd748cf..cac7adf 100644
--- a/lib/Target/X86/Disassembler/LLVMBuild.txt
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Disassembler
 parent = X86
 required_libraries = MC Support X86Info
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 2a2b5db..28e2460 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
   X86InstComments.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86AsmPrinter
-  LLVMMC
-  LLVMSupport
-  LLVMX86Utils
-  )
-
 add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt
index fb01323..6868dde 100644
--- a/lib/Target/X86/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86AsmPrinter
 parent = X86
 required_libraries = MC Support X86Utils
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 6e87efa..6e4b1b9 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -106,28 +106,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PUNPCKHBWrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(16, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKHBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHBWrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKHBWYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHBWYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKHWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHWDrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(8, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKHWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHWDrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKHWDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHWDYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKHDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(4, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKHDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKHDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKHQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHQDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(2, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKHQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHQDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKHQDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHQDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
     break;
 
   case X86::PUNPCKLBWrr:
@@ -135,42 +199,117 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PUNPCKLBWrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLBWMask(16, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKLBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLBWrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKLBWYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLBWYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKLWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLWDrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLWDMask(8, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKLWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLWDrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKLWDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLWDYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKLDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLDQMask(4, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKLDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKLDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKLQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLQDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLQDQMask(2, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKLQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLQDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKLQDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLQDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
     break;
 
   case X86::SHUFPDrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::SHUFPDrmi:
-    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VSHUFPDrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VSHUFPDrmi:
-    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VSHUFPDYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPDYrmi:
+    DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -179,14 +318,25 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::SHUFPSrmi:
-    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VSHUFPSrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VSHUFPSrmi:
-    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VSHUFPSYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPSYrmi:
+    DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -195,14 +345,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPDrm:
-    DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPDrm:
-    DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -210,7 +360,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPDYrm:
-    DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -218,14 +368,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPSrm:
-    DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPSrm:
-    DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -233,7 +383,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPSYrm:
-    DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -241,14 +391,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPDrm:
-    DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKHPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKHPDrm:
-    DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -256,7 +406,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKHPDYrm:
-    DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -264,14 +414,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPSrm:
-    DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKHPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKHPSrm:
-    DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -279,34 +429,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKHPSYrm:
-    DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPSri:
-    DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPSmi:
+    DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPSYri:
-    DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPSYmi:
+    DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPDri:
-    DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPDmi:
+    DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPDYri:
-    DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPDYmi:
+    DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERM2F128rr:
-    DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  case X86::VPERM2I128rr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPERM2F128rm:
+  case X86::VPERM2I128rm:
+    DecodeVPERM2F128Mask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                         ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   }
 
diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt
index 514566c..87305e0 100644
--- a/lib/Target/X86/LLVMBuild.txt
+++ b/lib/Target/X86/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
 [component_0]
 type = TargetGroup
 name = X86
@@ -30,4 +33,3 @@ name = X86CodeGen
 parent = X86
 required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 264e791..ab2ebb4 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -6,13 +6,6 @@ add_llvm_library(LLVMX86Desc
   X86MachObjectWriter.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86Desc
-  LLVMMC
-  LLVMSupport
-  LLVMX86AsmPrinter
-  LLVMX86Info
-  )
-
 add_dependencies(LLVMX86Desc X86CommonTableGen)
 
 # Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 3d09301..9e1d29c 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Desc
 parent = X86
 required_libraries = MC Support X86AsmPrinter X86Info
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 69ad7d7..87b2b05 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -107,6 +107,11 @@ public:
 
   bool MayNeedRelaxation(const MCInst &Inst) const;
 
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
   void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
   bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
@@ -244,6 +249,14 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
   return hasExp && !hasRIP;
 }
 
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                         uint64_t Value,
+                                         const MCInstFragment *DF,
+                                         const MCAsmLayout &Layout) const {
+  // Relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
 // we can relax?
 void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index c50f785..662ac1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -304,6 +304,12 @@ namespace X86II {
     // TAXD - Prefix before and after 0x0F. Combination of TA and XD.
     TAXD = 19 << Op0Shift,
 
+    // XOP8 - Prefix to include use of imm byte.
+    XOP8 = 20 << Op0Shift,
+
+    // XOP9 - Prefix to exclude use of imm byte.
+    XOP9 = 21 << Op0Shift,
+
     //===------------------------------------------------------------------===//
     // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
     // They are used to specify GPRs and SSE registers, 64-bit operand size,
@@ -418,7 +424,16 @@ namespace X86II {
     /// storing a classifier in the imm8 field.  To simplify our implementation,
     /// we handle this by storeing the classifier in the opcode field and using
     /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
-    Has3DNow0F0FOpcode = 1U << 7
+    Has3DNow0F0FOpcode = 1U << 7,
+
+    /// XOP_W - Same bit as VEX_W. Used to indicate swapping of
+    /// operand 3 and 4 to be encoded in ModRM or I8IMM. This is used
+    /// for FMA4 and XOP instructions.
+    XOP_W = 1U << 8,
+
+    /// XOP - Opcode prefix used by XOP instructions.
+    XOP = 1U << 9
+
   };
 
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -488,9 +503,12 @@ namespace X86II {
       return 0;
     case X86II::MRMSrcMem: {
       bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+      bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
       unsigned FirstMemOp = 1;
       if (HasVEX_4V)
         ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+      if (HasXOP_W)
+        ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
 
       // FIXME: Maybe lea should have its own form?  This is a horrible hack.
       //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2703100..eb64ad1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -125,7 +125,19 @@ getNonexecutableStackSection(MCContext &Ctx) const {
                            0, SectionKind::getMetadata());
 }
 
-X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+  if (Triple.getArch() == Triple::x86_64) {
+    GlobalPrefix = "";
+    PrivateGlobalPrefix = ".L";
+  }
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+}
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
   if (Triple.getArch() == Triple::x86_64) {
     GlobalPrefix = "";
     PrivateGlobalPrefix = ".L";
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8e..5d619e8 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -38,8 +38,12 @@ namespace llvm {
     virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
 
-  struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
-    explicit X86MCAsmInfoCOFF(const Triple &Triple);
+  struct X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+    explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+  };
+
+  struct X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+    explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
   };
 } // namespace llvm
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 1ab469c..8e14cb1 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -169,23 +169,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
   return false;
 }
 
-/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
-/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
-/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+///  _GLOBAL_OFFSET_TABLE_ and if it is of the form
+///  _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
 /// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
 /// of a binary expression.
-static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+enum GlobalOffsetTableExprKind {
+  GOT_None,
+  GOT_Normal,
+  GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  const MCExpr *RHS = 0;
   if (Expr->getKind() == MCExpr::Binary) {
     const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
     Expr = BE->getLHS();
+    RHS = BE->getRHS();
   }
 
   if (Expr->getKind() != MCExpr::SymbolRef)
-    return false;
+    return GOT_None;
 
   const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
   const MCSymbol &S = Ref->getSymbol();
-  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+  if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+    return GOT_None;
+  if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+    return GOT_SymDiff;
+  return GOT_Normal;
 }
 
 void X86MCCodeEmitter::
@@ -209,12 +222,15 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
 
   // If we have an immoffset, add it to the expression.
   if ((FixupKind == FK_Data_4 ||
-       FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
-      StartsWithGlobalOffsetTable(Expr)) {
-    assert(ImmOffset == 0);
-
-    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
-    ImmOffset = CurByte;
+       FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+    GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+    if (Kind != GOT_None) {
+      assert(ImmOffset == 0);
+
+      FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+      if (Kind == GOT_Normal)
+        ImmOffset = CurByte;
+    }
   }
 
   // If the fixup is pc-relative, we need to bias the value to be relative to
@@ -415,6 +431,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   // opcode extension, or ignored, depending on the opcode byte)
   unsigned char VEX_W = 0;
 
+  // XOP_W: opcode specific, same bit as VEX_W, but used to
+  // swap operand 3 and 4 for FMA4 and XOP instructions
+  unsigned char XOP_W = 0;
+
+  // XOP: Use XOP prefix byte 0x8f instead of VEX.
+  unsigned char XOP = 0;
+
   // VEX_5M (VEX m-mmmmm field):
   //
   //  0b00000: Reserved for future use
@@ -422,7 +445,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   //  0b00010: implied 0F 38 leading opcode bytes
   //  0b00011: implied 0F 3A leading opcode bytes
   //  0b00100-0b11111: Reserved for future use
-  //
+  //  0b01000: XOP map select - 08h instructions with imm byte
+  //  0b10001: XOP map select - 09h instructions with no imm byte
   unsigned char VEX_5M = 0x1;
 
   // VEX_4V (VEX vvvv field): a register specifier
@@ -453,6 +477,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
     VEX_W = 1;
 
+  if ((TSFlags >> X86II::VEXShift) & X86II::XOP_W)
+    XOP_W = 1;
+
+  if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+    XOP = 1;
+
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
     VEX_L = 1;
 
@@ -482,6 +512,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   case X86II::XD:  // F2 0F
     VEX_PP = 0x3;
     break;
+  case X86II::XOP8:
+    VEX_5M = 0x8;
+    break;
+  case X86II::XOP9:
+    VEX_5M = 0x9;
+    break;
   case X86II::A6:  // Bypass: Not used by VEX
   case X86II::A7:  // Bypass: Not used by VEX
   case X86II::TB:  // Bypass: Not used by VEX
@@ -489,6 +525,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     break;  // No prefix!
   }
 
+
   // Set the vector length to 256-bit if YMM0-YMM15 is used
   for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
     if (!MI.getOperand(i).isReg())
@@ -529,6 +566,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     //  src1(ModR/M), MemAddr, imm8
     //  src1(ModR/M), MemAddr, src2(VEX_I8IMM)
     //
+    //  FMA4:
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
     if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
       VEX_R = 0x0;
 
@@ -620,16 +660,16 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   //
   unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
 
-  if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+  if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
     EmitByte(0xC5, CurByte, OS);
     EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
     return;
   }
 
   // 3 byte VEX prefix
-  EmitByte(0xC4, CurByte, OS);
+  EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
   EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
-  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+  EmitByte(LastByte | ((VEX_W | XOP_W) << 7), CurByte, OS);
 }
 
 /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -889,6 +929,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
   bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasXOP_W = (TSFlags >> X86II::VEXShift) & X86II::XOP_W;
+  unsigned XOP_W_I8IMMOperand = 2;
 
   // Determine where the memory operand starts, if present.
   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -961,9 +1003,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       SrcRegNum++;
 
+    if(HasXOP_W) // Skip 2nd src (which is encoded in I8IMM)
+      SrcRegNum++;
+
     EmitRegModRMByte(MI.getOperand(SrcRegNum),
                      GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
-    CurOp = SrcRegNum + 1;
+
+    // 2 operands skipped with HasXOP_W, comensate accordingly
+    CurOp = HasXOP_W ? SrcRegNum : SrcRegNum + 1;
     if (HasVEX_4VOp3)
       ++CurOp;
     break;
@@ -975,6 +1022,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       ++AddrOperands;
       ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
     }
+    if(HasXOP_W) // Skip second register source (encoded in I8IMM)
+      ++FirstMemOp;
 
     EmitByte(BaseOpcode, CurByte, OS);
 
@@ -1062,12 +1111,24 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // according to the right size for the instruction.
   if (CurOp != NumOps) {
     // The last source register of a 4 operand instruction in AVX is encoded
-    // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+    // in bits[7:4] of a immediate byte.
     if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
-      const MCOperand &MO = MI.getOperand(CurOp++);
+      const MCOperand &MO = MI.getOperand(HasXOP_W ? XOP_W_I8IMMOperand
+                                                   : CurOp);
+      CurOp++;
       bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
       unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
       RegNum |= GetX86RegNum(MO) << 4;
+      // If there is an additional 5th operand it must be an immediate, which
+      // is encoded in bits[3:0]
+      if(CurOp != NumOps) {
+        const MCOperand &MIMM = MI.getOperand(CurOp++);
+        if(MIMM.isImm()) {
+          unsigned Val = MIMM.getImm();
+          assert(Val < 16 && "Immediate operand value out of range");
+          RegNum |= Val;
+        }
+      }
       EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
                     Fixups);
     } else {
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index a843515..f2a34ed 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -361,8 +361,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
       MAI = new X86_64MCAsmInfoDarwin(TheTriple);
     else
       MAI = new X86MCAsmInfoDarwin(TheTriple);
-  } else if (TheTriple.isOSWindows()) {
-    MAI = new X86MCAsmInfoCOFF(TheTriple);
+  } else if (TheTriple.getOS() == Triple::Win32) {
+    MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+  } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+    MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
   } else {
     MAI = new X86ELFMCAsmInfo(TheTriple);
   }
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7d901af..a581993 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -922,16 +922,3 @@ _test2:                                 ## @test2
 The insertps's of $0 are pointless complex copies.
 
 //===---------------------------------------------------------------------===//
-
-If SSE4.1 is available we should inline rounding functions instead of emitting
-a libcall.
-
-floor: roundsd $0x01, %xmm, %xmm
-ceil:  roundsd $0x02, %xmm, %xmm
-
-and likewise for the single precision versions.
-
-Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
-corresponding nodes and some targets (including X86) aren't ready for them.
-
-//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 4da00fa..b1d0b9f 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info
   X86TargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86Info
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt
index ee015bd..3c64a22 100644
--- a/lib/Target/X86/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Info
 parent = X86
 required_libraries = MC Support Target
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index caffd8b..2e72c34 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils
   X86ShuffleDecode.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86Utils
-  LLVMCore
-  LLVMSupport
-  )
-
 add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
index 3ee441e..de0a30f 100644
--- a/lib/Target/X86/Utils/LLVMBuild.txt
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = X86Utils
 parent = X86
 required_libraries = Core Support
 add_to_library_groups = X86
-
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index f6c9d7b..e7631b6 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -95,54 +95,31 @@ void DecodePSHUFLWMask(unsigned Imm,
   ShuffleMask.push_back(7);
 }
 
-void DecodePUNPCKLBWMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLWDMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLDQMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
-                          SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(VT, ShuffleMask);
-}
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
 
-void DecodePUNPCKHMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);
-    ShuffleMask.push_back(i+NElts+NElts/2);
-  }
-}
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
 
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
-                      SmallVectorImpl<unsigned> &ShuffleMask) {
-  // Part that reads from dest.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts);
-    Imm /= NElts;
-  }
-  // Part that reads from src.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts + NElts);
-    Imm /= NElts;
+  int NewImm = Imm;
+  for (unsigned l = 0; l < NumLanes; ++l) {
+    unsigned LaneStart = l * NumLaneElts;
+    // Part that reads from dest.
+    for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+      ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart);
+      NewImm /= NumLaneElts;
+    }
+    // Part that reads from src.
+    for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+      ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart);
+      NewImm /= NumLaneElts;
+    }
+    if (NumLaneElts == 4) NewImm = Imm; // reload imm
   }
 }
 
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -161,10 +138,10 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
   }
 }
 
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// etc.  VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -183,36 +160,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
   }
 }
 
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  unsigned NumLanes = (NumElts*32)/128;
-  unsigned LaneSize = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumLanes; ++l) {
-    for (unsigned i = 0; i != LaneSize; ++i) {
-      unsigned Idx = (Imm >> (i*2)) & 0x3 ;
-      ShuffleMask.push_back(Idx+(l*LaneSize));
-    }
-  }
-}
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
 
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  unsigned NumLanes = (NumElts*64)/128;
-  unsigned LaneSize = NumElts/NumLanes;
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
 
-  for (unsigned l = 0; l < NumLanes; ++l) {
-    for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      unsigned Idx = (Imm >> i) & 0x1;
-      ShuffleMask.push_back(Idx+(l*LaneSize));
+  for (unsigned l = 0; l != NumLanes; ++l) {
+    unsigned LaneStart = l*NumLaneElts;
+    for (unsigned i = 0; i != NumLaneElts; ++i) {
+      unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3
+                                      : (Imm >> (i+LaneStart)) & 0x1;
+      ShuffleMask.push_back(Idx+LaneStart);
     }
   }
 }
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 35f6530..243728f 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -46,50 +46,25 @@ void DecodePSHUFHWMask(unsigned Imm,
 void DecodePSHUFLWMask(unsigned Imm,
                        SmallVectorImpl<unsigned> &ShuffleMask);
 
-void DecodePUNPCKLBWMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLWDMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLDQMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
-                          SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKLMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKHMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
-                      SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeSHUFPMask(EVT VT, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask);
 
-/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
 /// etc.  VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
 
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
 /// etc.  VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
-
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
 
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
-                        SmallVectorImpl<unsigned> &ShuffleMask);
 
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
+// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
+// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
+// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
+// the first lane must be the same of the second.
+void DecodeVPERMILPMask(EVT VT, unsigned Imm,
                         SmallVectorImpl<unsigned> &ShuffleMask);
 
 void DecodeVPERM2F128Mask(unsigned Imm,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 62a7016..8229ca5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -91,6 +91,8 @@ def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
                                      "Enable three-operand fused multiple-add">;
 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
                                       "Enable four-operand fused multiple-add">;
+def FeatureXOP    : SubtargetFeature<"xop", "HasXOP", "true",
+                                      "Enable XOP instructions">;
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                           "HasVectorUAMem", "true",
                  "Allow unaligned memory operands on vector/SIMD instructions">;
@@ -194,14 +196,16 @@ def : Proc<"opteron-sse3",    [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
 def : Proc<"athlon64-sse3",   [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
                                FeatureSlowBTMem]>;
 def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B,
+                               Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
                                FeatureSlowBTMem]>;
-def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"istanbul",        [Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSSE4A]>;
-def : Proc<"shanghai",        [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>;
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"bdver1",          [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeatureCLMUL, FeatureFMA4,
+                               FeatureXOP, FeatureLZCNT]>;
+def : Proc<"bdver2",          [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeatureCLMUL, FeatureFMA4,
+                                FeatureXOP, FeatureF16C, FeatureLZCNT,
+                                 FeatureBMI]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [Feature3DNow]>;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 77b9905..aab2a05 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -158,10 +158,15 @@ def CC_X86_64_C : CallingConv<[
             CCIfSubtarget<"hasXMM()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
-  // The first 8 256-bit vector arguments are passed in YMM registers.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCIfSubtarget<"hasAVX()",
-            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+  // The first 8 256-bit vector arguments are passed in YMM registers, unless
+  // this is a vararg function.
+  // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+  // fixed arguments to vararg functions are supposed to be passed in
+  // registers.  Actually modeling that would be a lot of work, though.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                          CCIfSubtarget<"hasAVX()",
+                          CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+                                         YMM4, YMM5, YMM6, YMM7]>>>>,
 
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ba615a8..ed16e88 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1004,7 +1004,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     break;
   }
 
-  if (!Desc->isVariadic() && CurOp != NumOps) {
+  if (!MI.isVariadic() && CurOp != NumOps) {
 #ifndef NDEBUG
     dbgs() << "Cannot encode all operands of: " << MI << "\n";
 #endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 32f1770..1589439 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -728,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
-  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
     return false;
 
   // Let SDISel handle vararg functions.
@@ -1529,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
-  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
     return false;
 
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -1543,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // Fast-isel doesn't know about callee-pop yet.
   if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
-                       GuaranteedTailCallOpt))
+                       TM.Options.GuaranteedTailCallOpt))
     return false;
 
   // Check whether the function can return without sret-demotion.
@@ -2121,7 +2121,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
     default: return false;
     case MVT::f32:
       if (X86ScalarSSEf32) {
-        Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+        Opc = X86::FsFLD0SS;
         RC  = X86::FR32RegisterClass;
       } else {
         Opc = X86::LD_Fp032;
@@ -2130,7 +2130,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
       break;
     case MVT::f64:
       if (X86ScalarSSEf64) {
-        Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+        Opc = X86::FsFLD0SD;
         RC  = X86::FR64RegisterClass;
       } else {
         Opc = X86::LD_Fp064;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 819d242..6a40cc1 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineModuleInfo &MMI = MF.getMMI();
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
 
-  return (DisableFramePointerElim(MF) ||
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           RI->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() ||
@@ -210,7 +210,7 @@ static
 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator &MBBI,
                         unsigned StackPtr, uint64_t *NumBytes = NULL) {
-  // FIXME: THIS ISN'T RUN!!!
+  // FIXME:  THIS ISN'T RUN!!!
   return;
 
   if (MBBI == MBB.end()) return;
@@ -351,20 +351,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
 /// register. The number corresponds to the enum lists in
 /// compact_unwind_encoding.h.
 static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
-  int Idx = 1;
-  for (; *CURegs; ++CURegs, ++Idx)
+  for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
     if (*CURegs == Reg)
       return Idx;
 
   return -1;
 }
 
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
 /// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
 /// used with frameless stacks. It is passed the number of registers to be saved
 /// and an array of the registers saved.
-static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
-                                                         unsigned RegCount,
-                                                         bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+                                         unsigned RegCount, bool Is64Bit) {
   // The saved registers are numbered from 1 to 6. In order to encode the order
   // in which they were saved, we re-number them according to their place in the
   // register order. The re-numbering is relative to the last re-numbered
@@ -385,14 +387,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
   };
   const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
 
-  uint32_t RenumRegs[6];
-  for (unsigned i = 6 - RegCount; i < 6; ++i) {
+  for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
     int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
     if (CUReg == -1) return ~0U;
     SavedRegs[i] = CUReg;
+  }
+
+  // Reverse the list.
+  std::swap(SavedRegs[0], SavedRegs[5]);
+  std::swap(SavedRegs[1], SavedRegs[4]);
+  std::swap(SavedRegs[2], SavedRegs[3]);
 
+  uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+  for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
     unsigned Countless = 0;
-    for (unsigned j = 6 - RegCount; j < i; ++j)
+    for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
       if (SavedRegs[j] < SavedRegs[i])
         ++Countless;
 
@@ -435,8 +444,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
 
 /// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
 /// compact encoding with a frame pointer.
-static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
-                                                      bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+                                      bool Is64Bit) {
   static const unsigned CU32BitRegs[] = {
     X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
   };
@@ -448,13 +458,16 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
   // Encode the registers in the order they were saved, 3-bits per register. The
   // registers are numbered from 1 to 6.
   uint32_t RegEnc = 0;
-  for (int I = 5; I >= 0; --I) {
+  for (int I = 0; I != 6; --I) {
     unsigned Reg = SavedRegs[I];
     if (Reg == 0) break;
     int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
     if (CURegNum == -1)
       return ~0U;
-    RegEnc |= (CURegNum & 0x7) << (5 - I);
+
+    // Encode the 3-bit register number in order, skipping over 3-bits for each
+    // register.
+    RegEnc |= (CURegNum & 0x7) << ((5 - I) * 3);
   }
 
   assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
@@ -466,14 +479,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned StackPtr = RegInfo->getStackRegister();
 
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
   bool Is64Bit = STI.is64Bit();
   bool HasFP = hasFP(MF);
 
-  unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
-  int SavedRegIdx = 6;
+  unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+  unsigned SavedRegIdx = 0;
 
   unsigned OffsetSize = (Is64Bit ? 8 : 4);
 
@@ -481,14 +491,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   unsigned PushInstrSize = 1;
   unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
   unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
-  unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
   unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
 
   unsigned StackDivide = (Is64Bit ? 8 : 4);
 
   unsigned InstrOffset = 0;
-  unsigned CFAOffset = 0;
   unsigned StackAdjust = 0;
+  unsigned StackSize = 0;
 
   MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
   bool ExpectEnd = false;
@@ -504,10 +513,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
 
     if (Opc == PushInstr) {
       // If there are too many saved registers, we cannot use compact encoding.
-      if (--SavedRegIdx < 0) return 0;
+      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
 
-      SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
-      CFAOffset += OffsetSize;
+      SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+      StackAdjust += OffsetSize;
       InstrOffset += PushInstrSize;
     } else if (Opc == MoveInstr) {
       unsigned SrcReg = MI.getOperand(1).getReg();
@@ -516,13 +525,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       if (DstReg != FramePtr || SrcReg != StackPtr)
         return 0;
 
-      CFAOffset = 0;
+      StackAdjust = 0;
       memset(SavedRegs, 0, sizeof(SavedRegs));
-      SavedRegIdx = 6;
+      SavedRegIdx = 0;
       InstrOffset += MoveInstrSize;
-    } else if (Opc == SubtractInstr) {
-      if (StackAdjust)
-        // We all ready have a stack pointer adjustment.
+    } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+      if (StackSize)
+        // We already have a stack size.
         return 0;
 
       if (!MI.getOperand(0).isReg() ||
@@ -533,7 +543,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
         //   %RSP<def> = SUB64ri8 %RSP, 48
         return 0;
 
-      StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+      StackSize = MI.getOperand(2).getImm() / StackDivide;
       SubtractInstrIdx += InstrOffset;
       ExpectEnd = true;
     }
@@ -541,28 +551,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
 
   // Encode that we are using EBP/RBP as the frame pointer.
   uint32_t CompactUnwindEncoding = 0;
-  CFAOffset /= StackDivide;
+  StackAdjust /= StackDivide;
   if (HasFP) {
-    if ((CFAOffset & 0xFF) != CFAOffset)
+    if ((StackAdjust & 0xFF) != StackAdjust)
       // Offset was too big for compact encoding.
       return 0;
 
     // Get the encoding of the saved registers when we have a frame pointer.
     uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
-    if (RegEnc == ~0U)
-      return 0;
+    if (RegEnc == ~0U) return 0;
 
     CompactUnwindEncoding |= 0x01000000;
-    CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+    CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
     CompactUnwindEncoding |= RegEnc & 0x7FFF;
   } else {
-    unsigned FullOffset = CFAOffset + StackAdjust;
-    if ((FullOffset & 0xFF) == FullOffset) {
-      // Frameless stack.
+    ++StackAdjust;
+    uint32_t TotalStackSize = StackAdjust + StackSize;
+    if ((TotalStackSize & 0xFF) == TotalStackSize) {
+      // Frameless stack with a small stack size.
       CompactUnwindEncoding |= 0x02000000;
-      CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+
+      // Encode the stack size.
+      CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
     } else {
-      if ((CFAOffset & 0x7) != CFAOffset)
+      if ((StackAdjust & 0x7) != StackAdjust)
         // The extra stack adjustments are too big for us to handle.
         return 0;
 
@@ -573,16 +585,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       // instruction.
       CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
 
-      // Encode any extra stack stack changes (done via push instructions).
-      CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+      // Encode any extra stack stack adjustments (done via push instructions).
+      CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
     }
 
+    // Encode the number of registers saved.
+    CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
     // Get the encoding of the saved registers when we don't have a frame
     // pointer.
-    uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
-                                                               6 - SavedRegIdx,
-                                                               Is64Bit);
+    uint32_t RegEnc =
+      encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+                                               Is64Bit);
     if (RegEnc == ~0U) return 0;
+
+    // Encode the register encoding.
     CompactUnwindEncoding |= RegEnc & 0x3FF;
   }
 
@@ -638,10 +655,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // stack pointer (we fit in the Red Zone).
   if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
       !RegInfo->needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->adjustsStack() &&                      // No calls.
-      !IsWin64 &&                                  // Win64 has no Red Zone
-      !EnableSegmentedStacks) {                    // Regular stack
+      !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
+      !MFI->adjustsStack() &&                           // No calls.
+      !IsWin64 &&                                       // Win64 has no Red Zone
+      !MF.getTarget().Options.EnableSegmentedStacks) {  // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -978,7 +995,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     unsigned Opc = PI->getOpcode();
 
     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
-        !PI->getDesc().isTerminator())
+        !PI->isTerminator())
       break;
 
     --MBBI;
@@ -1306,6 +1323,10 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
   }
 }
 
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
 void
 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   MachineBasicBlock &prologueMBB = MF.front();
@@ -1360,16 +1381,24 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     TlsReg = X86::FS;
     TlsOffset = 0x70;
 
-    BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
-      .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+    if (StackSize < kSplitStackAvailable)
+      ScratchReg = X86::RSP;
+    else
+      BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+        .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
     BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
       .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   } else {
     TlsReg = X86::GS;
     TlsOffset = 0x30;
 
-    BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
-      .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+    if (StackSize < kSplitStackAvailable)
+      ScratchReg = X86::ESP;
+    else
+      BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+        .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
+
     BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
       .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   }
@@ -1394,9 +1423,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     MF.getRegInfo().setPhysRegUsed(X86::R10);
     MF.getRegInfo().setPhysRegUsed(X86::R11);
   } else {
-    // Since we'll call __morestack, stack alignment needs to be preserved.
-    BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
-      .addImm(8);
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
       .addImm(X86FI->getArgumentStackSize());
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
@@ -1411,11 +1437,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
       .addExternalSymbol("__morestack");
 
-  // __morestack only seems to remove 8 bytes off the stack. Add back the
-  // additional 8 bytes we added before pushing the arguments.
-  if (!Is64Bit)
-    BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
-      .addImm(8);
   if (IsNested)
     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
   else
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 96c6f41..03727a2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -256,7 +256,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
-  } else if (!UseSoftFloat) {
+  } else if (!TM.Options.UseSoftFloat) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
@@ -270,7 +270,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!UseSoftFloat) {
+  if (!TM.Options.UseSoftFloat) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -313,7 +313,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
-  } else if (!UseSoftFloat) {
+  } else if (!TM.Options.UseSoftFloat) {
     // Since AVX is a superset of SSE3, only check for SSE here.
     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
       // Expand FP_TO_UINT into a select.
@@ -378,6 +378,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
 
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i16  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i64  , Expand);
   if (Subtarget->hasBMI()) {
     setOperationAction(ISD::CTTZ           , MVT::i8   , Promote);
   } else {
@@ -388,6 +392,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
   }
 
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i8   , Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i16  , Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i64  , Expand);
   if (Subtarget->hasLZCNT()) {
     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
   } else {
@@ -537,14 +545,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Custom);
-  else if (EnableSegmentedStacks)
+  else if (TM.Options.EnableSegmentedStacks)
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Custom);
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Expand);
 
-  if (!UseSoftFloat && X86ScalarSSEf64) {
+  if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -576,7 +584,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // cases we handle.
     addLegalFPImmediate(APFloat(+0.0)); // xorpd
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
-  } else if (!UseSoftFloat && X86ScalarSSEf32) {
+  } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -605,11 +613,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
     }
-  } else if (!UseSoftFloat) {
+  } else if (!TM.Options.UseSoftFloat) {
     // f32 and f64 in x87.
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
@@ -620,7 +628,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
     }
@@ -639,7 +647,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
   // Long double always uses X87.
-  if (!UseSoftFloat) {
+  if (!TM.Options.UseSoftFloat) {
     addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -658,11 +666,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
     }
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
     }
 
+    setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+    setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+    setOperationAction(ISD::FRINT,  MVT::f80, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
     setOperationAction(ISD::FMA, MVT::f80, Expand);
   }
 
@@ -714,7 +727,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
@@ -748,7 +763,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
-  if (!UseSoftFloat && Subtarget->hasMMX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
     addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
     // No operations on x86mmx supported, everything uses intrinsics.
   }
@@ -785,7 +800,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
   setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasXMM()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasXMM()) {
     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
 
     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
@@ -802,7 +817,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SETCC,              MVT::v4f32, Custom);
   }
 
-  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasXMMInt()) {
     addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
 
     // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -983,7 +998,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->hasSSE42orAVX())
     setOperationAction(ISD::SETCC,             MVT::v2i64, Custom);
 
-  if (!UseSoftFloat && Subtarget->hasAVX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
     addRegisterClass(MVT::v32i8,  X86::VR256RegisterClass);
     addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
     addRegisterClass(MVT::v8i32,  X86::VR256RegisterClass);
@@ -1211,10 +1226,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
   maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
   maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
-  setPrefLoopAlignment(16);
+  setPrefLoopAlignment(4); // 2^4 bytes.
   benefitFromCodePlacementOpt = true;
 
-  setPrefFunctionAlignment(4);
+  setPrefFunctionAlignment(4); // 2^4 bytes.
 }
 
 
@@ -1709,7 +1724,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
 
 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
 /// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+                                   bool GuaranteedTailCallOpt) {
   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
 }
 
@@ -1723,7 +1739,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
-  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+                              getTargetMachine().Options.GuaranteedTailCallOpt);
   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
   EVT ValVT;
 
@@ -1873,7 +1890,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   unsigned StackSize = CCInfo.getNextStackOffset();
   // Align stack specially for tail calls.
-  if (FuncIsMadeTailCallSafe(CallConv))
+  if (FuncIsMadeTailCallSafe(CallConv,
+                             MF.getTarget().Options.GuaranteedTailCallOpt))
     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -1918,9 +1936,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
       assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
              "SSE register cannot be used when SSE is disabled!");
-      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+      assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+               NoImplicitFloatOps) &&
              "SSE register cannot be used when SSE is disabled!");
-      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+      if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+          !Subtarget->hasXMM())
         // Kernel mode asks for SSE to be disabled, so don't push them
         // on the stack.
         TotalNumXMMRegs = 0;
@@ -1998,7 +2018,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   // Some CCs need callee pop.
-  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+                       MF.getTarget().Options.GuaranteedTailCallOpt)) {
     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -2098,7 +2119,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // Sibcalls are automatically detected tailcalls which do not require
     // ABI changes.
-    if (!GuaranteedTailCallOpt && isTailCall)
+    if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
       IsSibcall = true;
 
     if (isTailCall)
@@ -2126,7 +2147,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // This is a sibcall. The memory operands are available in caller's
     // own caller's stack.
     NumBytes = 0;
-  else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+  else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+           IsTailCallConvention(CallConv))
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
@@ -2305,7 +2327,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     int FI = 0;
     // Do not flag preceding copytoreg stuff together with the following stuff.
     InFlag = SDValue();
-    if (GuaranteedTailCallOpt) {
+    if (getTargetMachine().Options.GuaranteedTailCallOpt) {
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         if (VA.isRegLoc())
@@ -2485,7 +2507,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPush;
-  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+                       getTargetMachine().Options.GuaranteedTailCallOpt))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
   else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
     // If this is a call to a struct-return function, the callee
@@ -2643,7 +2666,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   CallingConv::ID CallerCC = CallerF->getCallingConv();
   bool CCMatch = CallerCC == CalleeCC;
 
-  if (GuaranteedTailCallOpt) {
+  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
     if (IsTailCallConvention(CalleeCC) && CCMatch)
       return true;
     return false;
@@ -2843,23 +2866,10 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::MOVDDUP:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
-  case X86ISD::VPERM2F128:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
+  case X86ISD::VPERMILP:
+  case X86ISD::VPERM2X128:
     return true;
   }
   return false;
@@ -2885,10 +2895,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
+  case X86ISD::VPERMILP:
     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
   }
 
@@ -2902,7 +2909,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::PALIGN:
   case X86ISD::SHUFPD:
   case X86ISD::SHUFPS:
-  case X86ISD::VPERM2F128:
+  case X86ISD::VPERM2X128:
     return DAG.getNode(Opc, dl, VT, V1, V2,
                        DAG.getConstant(TargetMask, MVT::i8));
   }
@@ -2920,18 +2927,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::MOVLPD:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
     return DAG.getNode(Opc, dl, VT, V1, V2);
   }
   return SDValue();
@@ -3231,7 +3228,7 @@ bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
 static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
                           bool hasSSSE3OrAVX) {
   int i, e = VT.getVectorNumElements();
-  if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+  if (VT.getSizeInBits() != 128)
     return false;
 
   // Do not handle v2i64 / v2f64 shuffles with palignr.
@@ -3261,17 +3258,17 @@ static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
   return true;
 }
 
-/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVSHUFPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// VSHUFPSY.
-static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                          const X86Subtarget *Subtarget) {
+static bool isVSHUFPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                          bool HasAVX, bool Commuted = false) {
   int NumElems = VT.getVectorNumElements();
 
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+  if (!HasAVX || VT.getSizeInBits() != 256)
     return false;
 
-  if (NumElems != 8)
+  if (NumElems != 4 && NumElems != 8)
     return false;
 
   // VSHUFPSY divides the resulting vector into 4 chunks.
@@ -3284,124 +3281,63 @@ static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
   //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
   //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
   //
-  int QuarterSize = NumElems/4;
-  int HalfSize = QuarterSize*2;
-  for (int i = 0; i < QuarterSize; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, HalfSize))
-      return false;
-  for (int i = QuarterSize; i < QuarterSize*2; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
-      return false;
-
-  // The mask of the second half must be the same as the first but with
-  // the appropriate offsets. This works in the same way as VPERMILPS
-  // works with masks.
-  for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
-    if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
-      return false;
-    int FstHalfIdx = i-HalfSize;
-    if (Mask[FstHalfIdx] < 0)
-      continue;
-    if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
-      return false;
-  }
-  for (int i = QuarterSize*3; i < NumElems; ++i) {
-    if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
-      return false;
-    int FstHalfIdx = i-HalfSize;
-    if (Mask[FstHalfIdx] < 0)
-      continue;
-    if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
-      return false;
-
-  }
-
-  return true;
-}
-
-/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
-static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-  int NumElems = VT.getVectorNumElements();
-
-  assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
-         "Only supports v8i32 and v8f32 types");
-
-  int HalfSize = NumElems/2;
-  unsigned Mask = 0;
-  for (int i = 0; i != NumElems ; ++i) {
-    if (SVOp->getMaskElt(i) < 0)
-      continue;
-    // The mask of the first half must be equal to the second one.
-    unsigned Shamt = (i%HalfSize)*2;
-    unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
-    Mask |= Elt << Shamt;
-  }
-
-  return Mask;
-}
-
-/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
-/// version and the mask of the second half isn't binded with the first
-/// one.
-static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                           const X86Subtarget *Subtarget) {
-  int NumElems = VT.getVectorNumElements();
-
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
-    return false;
-
-  if (NumElems != 4)
-    return false;
-
-  // VSHUFPSY divides the resulting vector into 4 chunks.
+  // VSHUFPDY divides the resulting vector into 4 chunks.
   // The sources are also splitted into 4 chunks, and each destination
   // chunk must come from a different source chunk.
   //
   //  SRC1 =>      X3       X2       X1       X0
   //  SRC2 =>      Y3       Y2       Y1       Y0
   //
-  //  DST  =>  Y2..Y3,  X2..X3,  Y1..Y0,  X1..X0
+  //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
   //
-  int QuarterSize = NumElems/4;
-  int HalfSize = QuarterSize*2;
-  for (int i = 0; i < QuarterSize; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, HalfSize))
-      return false;
-  for (int i = QuarterSize; i < QuarterSize*2; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
-      return false;
-  for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
-    if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
-      return false;
-  for (int i = QuarterSize*3; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
-      return false;
+  unsigned QuarterSize = NumElems/4;
+  unsigned HalfSize = QuarterSize*2;
+  for (unsigned l = 0; l != 2; ++l) {
+    unsigned LaneStart = l*HalfSize;
+    for (unsigned s = 0; s != 2; ++s) {
+      unsigned QuarterStart = s*QuarterSize;
+      unsigned Src = (Commuted) ? (1-s) : s;
+      unsigned SrcStart = Src*NumElems + LaneStart;
+      for (unsigned i = 0; i != QuarterSize; ++i) {
+        int Idx = Mask[i+QuarterStart+LaneStart];
+        if (!isUndefOrInRange(Idx, SrcStart, SrcStart+HalfSize))
+          return false;
+        // For VSHUFPSY, the mask of the second half must be the same as the first
+        // but with the appropriate offsets. This works in the same way as
+        // VPERMILPS works with masks.
+        if (NumElems == 4 || l == 0 || Mask[i+QuarterStart] < 0)
+          continue;
+        if (!isUndefOrEqual(Idx, Mask[i+QuarterStart]+HalfSize))
+          return false;
+      }
+    }
+  }
 
   return true;
 }
 
-/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
-static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
+/// getShuffleVSHUFPYImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VSHUFPSY/VSHUFPDY instructions.
+static unsigned getShuffleVSHUFPYImmediate(SDNode *N) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   EVT VT = SVOp->getValueType(0);
   int NumElems = VT.getVectorNumElements();
 
-  assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
-         "Only supports v4i64 and v4f64 types");
+  assert(VT.getSizeInBits() == 256 && "Only supports 256-bit types");
+  assert((NumElems == 4 || NumElems == 8) && "Only supports v4 and v8 types");
 
   int HalfSize = NumElems/2;
+  unsigned Mul = (NumElems == 8) ? 2 : 1;
   unsigned Mask = 0;
-  for (int i = 0; i != NumElems ; ++i) {
-    if (SVOp->getMaskElt(i) < 0)
+  for (int i = 0; i != NumElems; ++i) {
+    int Elt = SVOp->getMaskElt(i);
+    if (Elt < 0)
       continue;
-    int Elt = SVOp->getMaskElt(i) % HalfSize;
-    Mask |= Elt << i;
+    Elt %= HalfSize;
+    unsigned Shamt = i;
+    // For VSHUFPSY, the mask of the first half must be equal to the second one.
+    if (NumElems == 8) Shamt %= HalfSize;
+    Mask |= Elt << (Shamt*Mul);
   }
 
   return Mask;
@@ -3409,8 +3345,8 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
 
 /// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
 /// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
-  unsigned NumElems = VT.getVectorNumElements();
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+                                     unsigned NumElems) {
   for (unsigned i = 0; i != NumElems; ++i) {
     int idx = Mask[i];
     if (idx < 0)
@@ -3422,31 +3358,13 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
   }
 }
 
-/// isCommutedVSHUFP() - Return true if swapping operands will 
-///  allow to use the "vshufpd" or "vshufps" instruction 
-///  for 256-bit vectors
-static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                               const X86Subtarget *Subtarget) {
-
-  unsigned NumElems = VT.getVectorNumElements();
-  if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8)))
-    return false;
-
-  SmallVector<int, 8> CommutedMask;
-  for (unsigned i = 0; i < NumElems; ++i)
-    CommutedMask.push_back(Mask[i]);
-
-  CommuteVectorShuffleMask(CommutedMask, VT);
-  return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget):
-      isVSHUFPSYMask(CommutedMask, VT, Subtarget);
-}
-
-
 /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                        bool Commuted = false) {
+  unsigned NumElems = VT.getVectorNumElements();
 
   if (VT.getSizeInBits() != 128)
     return false;
@@ -3454,12 +3372,14 @@ static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   if (NumElems != 2 && NumElems != 4)
     return false;
 
-  int Half = NumElems / 2;
-  for (int i = 0; i < Half; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, NumElems))
+  unsigned Half = NumElems / 2;
+  unsigned SrcStart = Commuted ? NumElems : 0;
+  for (unsigned i = 0; i != Half; ++i)
+    if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
       return false;
-  for (int i = Half; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+  SrcStart = Commuted ? 0 : NumElems;
+  for (unsigned i = Half; i != NumElems; ++i)
+    if (!isUndefOrInRange(Mask[i], SrcStart, SrcStart+NumElems))
       return false;
 
   return true;
@@ -3471,32 +3391,6 @@ bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
   return ::isSHUFPMask(M, N->getValueType(0));
 }
 
-/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-
-  int Half = NumElems / 2;
-  for (int i = 0; i < Half; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
-      return false;
-  for (int i = Half; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, NumElems))
-      return false;
-  return true;
-}
-
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
 bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
@@ -3765,15 +3659,15 @@ bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
   return ::isMOVLMask(M, N->getValueType(0));
 }
 
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
 /// as permutations between 128-bit chunks or halves. As an example: this
 /// shuffle bellow:
 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
 /// The first half comes from the second half of V1 and the second half from the
 /// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
-                             const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
+                             bool HasAVX) {
+  if (!HasAVX || VT.getSizeInBits() != 256)
     return false;
 
   // The shuffle result is divided into half A and half B. In total the two
@@ -3801,10 +3695,9 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
   return MatchA && MatchB;
 }
 
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
   EVT VT = SVOp->getValueType(0);
 
   int HalfSize = VT.getVectorNumElements()/2;
@@ -3826,81 +3719,47 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
   return (FstHalf | (SndHalf << 4));
 }
 
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
 /// Note that VPERMIL mask matching is different depending whether theunderlying
 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
 /// to the same elements of the low, but to the higher half of the source.
 /// In VPERMILPD the two lanes could be shuffled independently of each other
 /// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                            const X86Subtarget *Subtarget) {
+static bool isVPERMILPMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                           bool HasAVX) {
   int NumElts = VT.getVectorNumElements();
   int NumLanes = VT.getSizeInBits()/128;
 
-  if (!Subtarget->hasAVX())
+  if (!HasAVX)
     return false;
 
-  // Only match 256-bit with 64-bit types
-  if (VT.getSizeInBits() != 256 || NumElts != 4)
+  // Only match 256-bit with 32/64-bit types
+  if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
     return false;
 
-  // The mask on the high lane is independent of the low. Both can match
-  // any element in inside its own lane, but can't cross.
   int LaneSize = NumElts/NumLanes;
-  for (int l = 0; l < NumLanes; ++l)
-    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      int LaneStart = l*LaneSize;
-      if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
+  for (int l = 0; l != NumLanes; ++l) {
+    int LaneStart = l*LaneSize;
+    for (int i = 0; i != LaneSize; ++i) {
+      if (!isUndefOrInRange(Mask[i+LaneStart], LaneStart, LaneStart+LaneSize))
+        return false;
+      if (NumElts == 4 || l == 0)
+        continue;
+      // VPERMILPS handling
+      if (Mask[i] < 0)
+        continue;
+      if (!isUndefOrEqual(Mask[i+LaneStart], Mask[i]+LaneSize))
         return false;
     }
-
-  return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                            const X86Subtarget *Subtarget) {
-  unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = VT.getSizeInBits()/128;
-
-  if (!Subtarget->hasAVX())
-    return false;
-
-  // Only match 256-bit with 32-bit types
-  if (VT.getSizeInBits() != 256 || NumElts != 8)
-    return false;
-
-  // The mask on the high lane should be the same as the low. Actually,
-  // they can differ if any of the corresponding index in a lane is undef
-  // and the other stays in range.
-  int LaneSize = NumElts/NumLanes;
-  for (int i = 0; i < LaneSize; ++i) {
-    int HighElt = i+LaneSize;
-    bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
-    bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
-    if (!HighValid || !LowValid)
-      return false;
-    if (Mask[i] < 0 || Mask[HighElt] < 0)
-      continue;
-    if (Mask[HighElt]-Mask[i] != LaneSize)
-      return false;
   }
 
   return true;
 }
 
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERMILPImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERMILPS/D* instructions.
+static unsigned getShuffleVPERMILPImmediate(ShuffleVectorSDNode *SVOp) {
   EVT VT = SVOp->getValueType(0);
 
   int NumElts = VT.getVectorNumElements();
@@ -3911,43 +3770,22 @@ static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
   // where a mask will match because the same mask element is undef on the
   // first half but valid on the second. This would get pathological cases
   // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
+  unsigned Shift = (LaneSize == 4) ? 2 : 1;
   unsigned Mask = 0;
-  for (int l = 0; l < NumLanes; ++l) {
-    for (int i = 0; i < LaneSize; ++i) {
-      int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
-      if (MaskElt < 0)
-        continue;
-      if (MaskElt >= LaneSize)
-        MaskElt -= LaneSize;
-      Mask |= MaskElt << (i*2);
-    }
+  for (int i = 0; i != NumElts; ++i) {
+    int MaskElt = SVOp->getMaskElt(i);
+    if (MaskElt < 0)
+      continue;
+    MaskElt %= LaneSize;
+    unsigned Shamt = i;
+    // VPERMILPSY, the mask of the first half must be equal to the second one
+    if (NumElts == 8) Shamt %= LaneSize;
+    Mask |= MaskElt << (Shamt*Shift);
   }
 
   return Mask;
 }
 
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-
-  int NumElts = VT.getVectorNumElements();
-  int NumLanes = VT.getSizeInBits()/128;
-
-  unsigned Mask = 0;
-  int LaneSize = NumElts/NumLanes;
-  for (int l = 0; l < NumLanes; ++l)
-    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      int MaskElt = SVOp->getMaskElt(i);
-      if (MaskElt < 0)
-        continue;
-      Mask |= (MaskElt-l*LaneSize) << i;
-    }
-
-  return Mask;
-}
-
 /// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
 /// element of vector 2 and the other elements to come from vector 1 in order.
@@ -4035,21 +3873,18 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
-                           const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
+static bool isMOVDDUPYMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                           bool HasAVX) {
   int NumElts = VT.getVectorNumElements();
-  bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
 
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
-      !V2IsUndef || NumElts != 4)
+  if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
     return false;
 
   for (int i = 0; i != NumElts/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), 0))
+    if (!isUndefOrEqual(Mask[i], 0))
       return false;
   for (int i = NumElts/2; i != NumElts; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+    if (!isUndefOrEqual(Mask[i], NumElts/2))
       return false;
   return true;
 }
@@ -4164,14 +3999,13 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
 
 /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VVT = N->getValueType(0);
-  unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
   int Val = 0;
 
   unsigned i, e;
-  for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+  for (i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
     Val = SVOp->getMaskElt(i);
     if (Val >= 0)
       break;
@@ -4631,29 +4465,14 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
     case X86ISD::SHUFPS:
     case X86ISD::SHUFPD:
       ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeSHUFPSMask(NumElems,
-                       cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                       ShuffleMask);
-      break;
-    case X86ISD::PUNPCKHBW:
-    case X86ISD::PUNPCKHWD:
-    case X86ISD::PUNPCKHDQ:
-    case X86ISD::PUNPCKHQDQ:
-      DecodePUNPCKHMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::UNPCKHPS:
-    case X86ISD::UNPCKHPD:
-      DecodeUNPCKHPMask(VT, ShuffleMask);
+      DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                      ShuffleMask);
       break;
-    case X86ISD::PUNPCKLBW:
-    case X86ISD::PUNPCKLWD:
-    case X86ISD::PUNPCKLDQ:
-    case X86ISD::PUNPCKLQDQ:
-      DecodePUNPCKLMask(VT, ShuffleMask);
+    case X86ISD::UNPCKH:
+      DecodeUNPCKHMask(VT, ShuffleMask);
       break;
-    case X86ISD::UNPCKLPS:
-    case X86ISD::UNPCKLPD:
-      DecodeUNPCKLPMask(VT, ShuffleMask);
+    case X86ISD::UNPCKL:
+      DecodeUNPCKLMask(VT, ShuffleMask);
       break;
     case X86ISD::MOVHLPS:
       DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -4686,27 +4505,12 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
       return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
                                  Depth+1);
     }
-    case X86ISD::VPERMILPS:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERMILPSY:
+    case X86ISD::VPERMILP:
       ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
+      DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                         ShuffleMask);
       break;
-    case X86ISD::VPERMILPD:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERMILPDY:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERM2F128:
+    case X86ISD::VPERM2X128:
       ImmN = N->getOperand(N->getNumOperands()-1);
       DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
                            ShuffleMask);
@@ -5334,8 +5138,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                            DAG);
       } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
-        EVT MiddleVT = MVT::v4i32;
+        unsigned NumBits = VT.getSizeInBits();
+        assert((NumBits == 128 || NumBits == 256) && 
+               "Expected an SSE or AVX value type!");
+        EVT MiddleVT = NumBits == 128 ? MVT::v4i32 : MVT::v8i32;
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
         Item = getShuffleVectorZeroOrUndef(Item, 0, true,
                                            Subtarget->hasXMMInt(), DAG);
@@ -6256,7 +6062,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
     // from X.
     if (NumHi == 3) {
       // Normalize it so the 3 elements come from V1.
-      CommuteVectorShuffleMask(PermMask, VT);
+      CommuteVectorShuffleMask(PermMask, 4);
       std::swap(V1, V2);
     }
 
@@ -6566,70 +6372,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
                               X86::getShuffleSHUFImmediate(SVOp), DAG);
 }
 
-static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32: return X86ISD::PUNPCKLDQ;
-  case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
-  case MVT::v8i32:
-    if (HasAVX2)   return X86ISD::PUNPCKLDQ;
-    // else use fp unit for int unpack.
-  case MVT::v8f32:
-  case MVT::v4f32: return X86ISD::UNPCKLPS;
-  case MVT::v4i64:
-    if (HasAVX2)   return X86ISD::PUNPCKLQDQ;
-    // else use fp unit for int unpack.
-  case MVT::v4f64:
-  case MVT::v2f64: return X86ISD::UNPCKLPD;
-  case MVT::v32i8:
-  case MVT::v16i8: return X86ISD::PUNPCKLBW;
-  case MVT::v16i16:
-  case MVT::v8i16: return X86ISD::PUNPCKLWD;
-  default:
-    llvm_unreachable("Unknown type for unpckl");
-  }
-  return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32: return X86ISD::PUNPCKHDQ;
-  case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
-  case MVT::v8i32:
-    if (HasAVX2)   return X86ISD::PUNPCKHDQ;
-    // else use fp unit for int unpack.
-  case MVT::v8f32:
-  case MVT::v4f32: return X86ISD::UNPCKHPS;
-  case MVT::v4i64:
-    if (HasAVX2)   return X86ISD::PUNPCKHQDQ;
-    // else use fp unit for int unpack.
-  case MVT::v4f64:
-  case MVT::v2f64: return X86ISD::UNPCKHPD;
-  case MVT::v32i8:
-  case MVT::v16i8: return X86ISD::PUNPCKHBW;
-  case MVT::v16i16:
-  case MVT::v8i16: return X86ISD::PUNPCKHWD;
-  default:
-    llvm_unreachable("Unknown type for unpckh");
-  }
-  return 0;
-}
-
-static inline unsigned getVPERMILOpcode(EVT VT) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32:
-  case MVT::v4f32: return X86ISD::VPERMILPS;
-  case MVT::v2i64:
-  case MVT::v2f64: return X86ISD::VPERMILPD;
-  case MVT::v8i32:
-  case MVT::v8f32: return X86ISD::VPERMILPSY;
-  case MVT::v4i64:
-  case MVT::v4f64: return X86ISD::VPERMILPDY;
-  default:
-    llvm_unreachable("Unknown type for vpermil");
-  }
-  return 0;
-}
-
 static
 SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
                                const TargetLowering &TLI,
@@ -6703,17 +6445,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
-  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
   bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
   bool V1IsSplat = false;
   bool V2IsSplat = false;
   bool HasXMMInt = Subtarget->hasXMMInt();
+  bool HasAVX    = Subtarget->hasAVX();
   bool HasAVX2   = Subtarget->hasAVX2();
   MachineFunction &MF = DAG.getMachineFunction();
   bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
 
   assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
 
+  assert(V1.getOpcode() != ISD::UNDEF && "Op 1 of shuffle should not be undef");
+
   // Vector shuffle lowering takes 3 steps:
   //
   // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
@@ -6738,11 +6482,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
   if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
       V2IsUndef && RelaxedMayFoldVectorLoad(V1))
@@ -6754,8 +6496,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   // Use to match splats
   if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
       (VT == MVT::v2f64 || VT == MVT::v2i64))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (X86::isPSHUFDMask(SVOp)) {
     // The actual implementation will match the mask in the if above and then
@@ -6787,8 +6528,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (X86::isMOVLMask(SVOp)) {
-    if (V1IsUndef)
-      return V2;
     if (ISD::isBuildVectorAllZeros(V1.getNode()))
       return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
     if (!X86::isMOVLPMask(SVOp)) {
@@ -6834,17 +6573,19 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   V2IsSplat = isSplatVector(V2.getNode());
 
   // Canonicalize the splat or undef, if present, to be on the RHS.
-  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+  if (V1IsSplat && !V2IsSplat) {
     Op = CommuteVectorShuffle(SVOp, DAG);
     SVOp = cast<ShuffleVectorSDNode>(Op);
     V1 = SVOp->getOperand(0);
     V2 = SVOp->getOperand(1);
     std::swap(V1IsSplat, V2IsSplat);
-    std::swap(V1IsUndef, V2IsUndef);
     Commuted = true;
   }
 
-  if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+  SmallVector<int, 32> M;
+  SVOp->getMask(M);
+
+  if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
     // Shuffling low element of v1 into undef, just return v1.
     if (V2IsUndef)
       return V1;
@@ -6854,13 +6595,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (X86::isUNPCKLMask(SVOp, HasAVX2))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
-                                DAG);
+  if (isUNPCKLMask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-  if (X86::isUNPCKHMask(SVOp, HasAVX2))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
-                                DAG);
+  if (isUNPCKHMask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
     // Normalize mask so all entries that point to V2 points to its first
@@ -6884,35 +6623,30 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
 
     if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
-      return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
-                                  DAG);
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
 
     if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
-      return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
-                                  DAG);
+      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
   }
 
   // Normalize the node to match x86 shuffle ops if needed
-  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+  if (!V2IsUndef && (isSHUFPMask(M, VT, /* Commuted */ true) ||
+                     isVSHUFPYMask(M, VT, HasAVX, /* Commuted */ true)))
     return CommuteVectorShuffle(SVOp, DAG);
 
   // The checks below are all present in isShuffleMaskLegal, but they are
   // inlined here right now to enable us to directly emit target specific
   // nodes, and remove one by one until they don't return Op anymore.
-  SmallVector<int, 16> M;
-  SVOp->getMask(M);
 
   if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()))
     return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
-                                X86::getShufflePALIGNRImmediate(SVOp),
+                                getShufflePALIGNRImmediate(SVOp),
                                 DAG);
 
   if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
       SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64)
-      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
-    if (VT == MVT::v2i64)
-      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   }
 
   if (isPSHUFHWMask(M, VT))
@@ -6929,12 +6663,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
                                 X86::getShuffleSHUFImmediate(SVOp), DAG);
 
-  if (X86::isUNPCKL_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
-  if (X86::isUNPCKH_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
-                                DAG);
+  if (isUNPCKL_v_undef_Mask(M, VT))
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+  if (isUNPCKH_v_undef_Mask(M, VT))
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   //===--------------------------------------------------------------------===//
   // Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6942,44 +6674,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   //
 
   // Handle VMOVDDUPY permutations
-  if (isMOVDDUPYMask(SVOp, Subtarget))
+  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
     return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
 
-  // Handle VPERMILPS* permutations
-  if (isVPERMILPSMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
-                                getShuffleVPERMILPSImmediate(SVOp), DAG);
-
-  // Handle VPERMILPD* permutations
-  if (isVPERMILPDMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
-                                getShuffleVPERMILPDImmediate(SVOp), DAG);
+  // Handle VPERMILPS/D* permutations
+  if (isVPERMILPMask(M, VT, HasAVX))
+    return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+                                getShuffleVPERMILPImmediate(SVOp), DAG);
 
-  // Handle VPERM2F128 permutations
-  if (isVPERM2F128Mask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
-                                getShuffleVPERM2F128Immediate(SVOp), DAG);
+  // Handle VPERM2F128/VPERM2I128 permutations
+  if (isVPERM2X128Mask(M, VT, HasAVX))
+    return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+                                V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
 
-  // Handle VSHUFPSY permutations
-  if (isVSHUFPSYMask(M, VT, Subtarget))
+  // Handle VSHUFPS/DY permutations
+  if (isVSHUFPYMask(M, VT, HasAVX))
     return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
-                                getShuffleVSHUFPSYImmediate(SVOp), DAG);
-
-  // Handle VSHUFPDY permutations
-  if (isVSHUFPDYMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
-                                getShuffleVSHUFPDYImmediate(SVOp), DAG);
-
-  // Try to swap operands in the node to match x86 shuffle ops
-  if (isCommutedVSHUFPMask(M, VT, Subtarget)) {
-    // Now we need to commute operands.
-    SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG));
-    V1 = SVOp->getOperand(0);
-    V2 = SVOp->getOperand(1);
-    unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp):
-        getShuffleVSHUFPSYImmediate(SVOp);
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG);
-  }
+                                getShuffleVSHUFPYImmediate(SVOp), DAG);
 
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
@@ -7888,7 +7599,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   LLVMContext *Context = DAG.getContext();
 
   // Build some magic constants.
-  std::vector<Constant*> CV0;
+  SmallVector<Constant*,4> CV0;
   CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
   CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
   CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
@@ -7896,7 +7607,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   Constant *C0 = ConstantVector::get(CV0);
   SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
 
-  std::vector<Constant*> CV1;
+  SmallVector<Constant*,2> CV1;
   CV1.push_back(
     ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
   CV1.push_back(
@@ -8176,17 +7887,13 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
   EVT EltVT = VT;
   if (VT.isVector())
     EltVT = VT.getVectorElementType();
-  std::vector<Constant*> CV;
+  SmallVector<Constant*,4> CV;
   if (EltVT == MVT::f64) {
     Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
-    CV.push_back(C);
-    CV.push_back(C);
+    CV.assign(2, C);
   } else {
     Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
+    CV.assign(4, C);
   }
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8201,19 +7908,18 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   EVT EltVT = VT;
-  if (VT.isVector())
+  unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+  if (VT.isVector()) {
     EltVT = VT.getVectorElementType();
-  std::vector<Constant*> CV;
+    NumElts = VT.getVectorNumElements();
+  }
+  SmallVector<Constant*,8> CV;
   if (EltVT == MVT::f64) {
     Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
-    CV.push_back(C);
-    CV.push_back(C);
+    CV.assign(NumElts, C);
   } else {
     Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
+    CV.assign(NumElts, C);
   }
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
@@ -8221,11 +7927,12 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
                              MachinePointerInfo::getConstantPool(),
                              false, false, false, 16);
   if (VT.isVector()) {
+    MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
     return DAG.getNode(ISD::BITCAST, dl, VT,
-                       DAG.getNode(ISD::XOR, dl, MVT::v2i64,
-                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                       DAG.getNode(ISD::XOR, dl, XORVT,
+                    DAG.getNode(ISD::BITCAST, dl, XORVT,
                                 Op.getOperand(0)),
-                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+                    DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
   } else {
     return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
   }
@@ -8254,7 +7961,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   // type, and that won't be f80 since that is not custom lowered.
 
   // First get the sign bit of second operand.
-  std::vector<Constant*> CV;
+  SmallVector<Constant*,4> CV;
   if (SrcVT == MVT::f64) {
     CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
     CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
@@ -9253,7 +8960,7 @@ SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                            SelectionDAG &DAG) const {
   assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
-          EnableSegmentedStacks) &&
+          getTargetMachine().Options.EnableSegmentedStacks) &&
          "This should be used only on Windows targets or when segmented stacks "
          "are being used");
   assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
@@ -9267,7 +8974,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   bool Is64Bit = Subtarget->is64Bit();
   EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
 
-  if (EnableSegmentedStacks) {
+  if (getTargetMachine().Options.EnableSegmentedStacks) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
@@ -9403,7 +9110,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
   if (ArgMode == 2) {
     // Sanity Check: Make sure using fp_offset makes sense.
-    assert(!UseSoftFloat &&
+    assert(!getTargetMachine().Options.UseSoftFloat &&
            !(DAG.getMachineFunction()
                 .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
            Subtarget->hasXMM());
@@ -10472,7 +10179,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
                     DAG.getConstant(4, MVT::i32));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
 
@@ -10487,13 +10194,13 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
                     DAG.getConstant(2, MVT::i32));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
+    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, M, R);
     // a += a
     Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
 
     // return pblendv(r, r+r, a);
     R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
-                    R, DAG.getNode(ISD::ADD, dl, VT, R, R));
+                    DAG.getNode(ISD::ADD, dl, VT, R, R), R);
     return R;
   }
 
@@ -11194,6 +10901,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGN:              return "X86ISD::PSIGN";
   case X86ISD::BLENDV:             return "X86ISD::BLENDV";
+  case X86ISD::HADD:               return "X86ISD::HADD";
+  case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
@@ -11266,24 +10975,11 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
   case X86ISD::MOVSD:              return "X86ISD::MOVSD";
   case X86ISD::MOVSS:              return "X86ISD::MOVSS";
-  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
-  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
-  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
-  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
-  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
-  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
-  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
-  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
-  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW";
-  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
-  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
-  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
+  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
+  case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
-  case X86ISD::VPERMILPS:          return "X86ISD::VPERMILPS";
-  case X86ISD::VPERMILPSY:         return "X86ISD::VPERMILPSY";
-  case X86ISD::VPERMILPD:          return "X86ISD::VPERMILPD";
-  case X86ISD::VPERMILPDY:         return "X86ISD::VPERMILPDY";
-  case X86ISD::VPERM2F128:         return "X86ISD::VPERM2F128";
+  case X86ISD::VPERMILP:           return "X86ISD::VPERMILP";
+  case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
   case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
@@ -11391,7 +11087,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
                                       EVT VT) const {
   // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
-    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX());
+    return false;
 
   // FIXME: pshufb, blends, shifts.
   return (VT.getVectorNumElements() == 2 ||
@@ -11419,7 +11115,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
     return (isMOVLMask(Mask, VT)  ||
             isCommutedMOVLMask(Mask, VT, true) ||
             isSHUFPMask(Mask, VT) ||
-            isCommutedSHUFPMask(Mask, VT));
+            isSHUFPMask(Mask, VT, /* Commuted */ true));
   }
   return false;
 }
@@ -12289,7 +11985,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
   MachineFunction *MF = BB->getParent();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
 
-  assert(EnableSegmentedStacks);
+  assert(getTargetMachine().Options.EnableSegmentedStacks);
 
   unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
   unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -13169,7 +12865,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // the operands would cause it to handle comparisons between positive
         // and negative zero incorrectly.
         if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
-          if (!UnsafeFPMath &&
+          if (!DAG.getTarget().Options.UnsafeFPMath &&
               !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
           std::swap(LHS, RHS);
@@ -13179,7 +12875,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
       case ISD::SETOLE:
         // Converting this to a min would handle comparisons between positive
         // and negative zero incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
           break;
         Opcode = X86ISD::FMIN;
@@ -13197,7 +12893,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
       case ISD::SETOGE:
         // Converting this to a max would handle comparisons between positive
         // and negative zero incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
           break;
         Opcode = X86ISD::FMAX;
@@ -13207,7 +12903,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // the operands would cause it to handle comparisons between positive
         // and negative zero incorrectly.
         if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
-          if (!UnsafeFPMath &&
+          if (!DAG.getTarget().Options.UnsafeFPMath &&
               !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
           std::swap(LHS, RHS);
@@ -13233,7 +12929,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // Converting this to a min would handle comparisons between positive
         // and negative zero incorrectly, and swapping the operands would
         // cause it to handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
           if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
             break;
@@ -13243,7 +12939,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         break;
       case ISD::SETUGT:
         // Converting this to a min would handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
           break;
         Opcode = X86ISD::FMIN;
@@ -13268,7 +12964,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // Converting this to a max would handle comparisons between positive
         // and negative zero incorrectly, and swapping the operands would
         // cause it to handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
           if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
             break;
@@ -14048,7 +13744,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
       X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
       Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
       Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
-      Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y);
+      Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
       return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
     }
   }
@@ -14232,7 +13928,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
-  // If we are saving a concatination of two XMM registers, perform two stores.
+  // If we are saving a concatenation of two XMM registers, perform two stores.
   // This is better in Sandy Bridge cause one 256-bit mem op is done via two
   // 128-bit ones. If in the future the cost becomes only one memory access the
   // first version would be better.
@@ -14342,7 +14038,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
   const Function *F = DAG.getMachineFunction().getFunction();
   bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
-  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+  bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
                      && Subtarget->hasXMMInt();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
@@ -14458,7 +14154,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 /// set to A, RHS to B, and the routine returns 'true'.
 /// Note that the binary operation should have the property that if one of the
 /// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
   // Look for the following pattern: if
   //   A = < float a0, float a1, float a2, float a3 >
   //   B = < float b0, float b1, float b2, float b3 >
@@ -14474,7 +14170,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
     return false;
 
   EVT VT = LHS.getValueType();
-  unsigned N = VT.getVectorNumElements();
+
+  assert((VT.is128BitVector() || VT.is256BitVector()) &&
+         "Unsupported vector type for horizontal add/sub");
+
+  // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+  // operate independently on 128-bit lanes.
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts / NumLanes;
+  assert((NumLaneElts % 2 == 0) &&
+         "Vector type should have an even number of elements in each lane");
+  unsigned HalfLaneElts = NumLaneElts/2;
 
   // View LHS in the form
   //   LHS = VECTOR_SHUFFLE A, B, LMask
@@ -14483,7 +14190,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   // NOTE: in what follows a default initialized SDValue represents an UNDEF of
   // type VT.
   SDValue A, B;
-  SmallVector<int, 8> LMask(N);
+  SmallVector<int, 16> LMask(NumElts);
   if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
     if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
       A = LHS.getOperand(0);
@@ -14493,14 +14200,14 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   } else {
     if (LHS.getOpcode() != ISD::UNDEF)
       A = LHS;
-    for (unsigned i = 0; i != N; ++i)
+    for (unsigned i = 0; i != NumElts; ++i)
       LMask[i] = i;
   }
 
   // Likewise, view RHS in the form
   //   RHS = VECTOR_SHUFFLE C, D, RMask
   SDValue C, D;
-  SmallVector<int, 8> RMask(N);
+  SmallVector<int, 16> RMask(NumElts);
   if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
     if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
       C = RHS.getOperand(0);
@@ -14510,7 +14217,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   } else {
     if (RHS.getOpcode() != ISD::UNDEF)
       C = RHS;
-    for (unsigned i = 0; i != N; ++i)
+    for (unsigned i = 0; i != NumElts; ++i)
       RMask[i] = i;
   }
 
@@ -14525,30 +14232,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   // If A and B occur in reverse order in RHS, then "swap" them (which means
   // rewriting the mask).
   if (A != C)
-    for (unsigned i = 0; i != N; ++i) {
-      unsigned Idx = RMask[i];
-      if (Idx < N)
-        RMask[i] += N;
-      else if (Idx < 2*N)
-        RMask[i] -= N;
-    }
+    CommuteVectorShuffleMask(RMask, NumElts);
 
   // At this point LHS and RHS are equivalent to
   //   LHS = VECTOR_SHUFFLE A, B, LMask
   //   RHS = VECTOR_SHUFFLE A, B, RMask
   // Check that the masks correspond to performing a horizontal operation.
-  for (unsigned i = 0; i != N; ++i) {
-    unsigned LIdx = LMask[i], RIdx = RMask[i];
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int LIdx = LMask[i], RIdx = RMask[i];
 
     // Ignore any UNDEF components.
-    if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
-        || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+    if (LIdx < 0 || RIdx < 0 ||
+        (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+        (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
       continue;
 
     // Check that successive elements are being operated on.  If not, this is
     // not a horizontal operation.
-    if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
-        !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+    unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+    unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+    int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+    if (!(LIdx == Index && RIdx == Index + 1) &&
+        !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
       return false;
   }
 
@@ -14565,7 +14270,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+  if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, true))
     return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -14579,7 +14285,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal subs from subs of shuffles.
-  if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+  if (((Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, false))
     return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -14783,7 +14490,8 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
   SDValue Op1 = N->getOperand(1);
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
+  if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
 
@@ -14815,8 +14523,9 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal adds from adds of shuffles.
   EVT VT = N->getValueType(0);
-  if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) &&
-      isHorizontalBinOp(Op0, Op1, false))
+  if (((Subtarget->hasSSSE3orAVX() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+      isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
 
   return OptimizeConditionalInDecrement(N, DAG);
@@ -14857,18 +14566,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::SHUFPS:      // Handle all target specific shuffles
   case X86ISD::SHUFPD:
   case X86ISD::PALIGN:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
+  case X86ISD::UNPCKH:
+  case X86ISD::UNPCKL:
   case X86ISD::MOVHLPS:
   case X86ISD::MOVLHPS:
   case X86ISD::PSHUFD:
@@ -14876,11 +14575,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PSHUFLW:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
-  case X86ISD::VPERM2F128:
+  case X86ISD::VPERMILP:
+  case X86ISD::VPERM2X128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
   }
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index ccff3a5..cfc1f88 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -273,23 +273,10 @@ namespace llvm {
       MOVLPD,
       MOVSD,
       MOVSS,
-      UNPCKLPS,
-      UNPCKLPD,
-      UNPCKHPS,
-      UNPCKHPD,
-      PUNPCKLBW,
-      PUNPCKLWD,
-      PUNPCKLDQ,
-      PUNPCKLQDQ,
-      PUNPCKHBW,
-      PUNPCKHWD,
-      PUNPCKHDQ,
-      PUNPCKHQDQ,
-      VPERMILPS,
-      VPERMILPSY,
-      VPERMILPD,
-      VPERMILPDY,
-      VPERM2F128,
+      UNPCKL,
+      UNPCKH,
+      VPERMILP,
+      VPERM2X128,
       VBROADCAST,
 
       // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
@@ -468,10 +455,6 @@ namespace llvm {
     /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
     unsigned getShufflePSHUFLWImmediate(SDNode *N);
 
-    /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-    unsigned getShufflePALIGNRImmediate(SDNode *N);
-
     /// getExtractVEXTRACTF128Immediate - Return the appropriate
     /// immediate to extract the specified EXTRACT_SUBVECTOR index
     /// with VEXTRACTF128 instructions.
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773..f443088 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -58,3 +58,391 @@ let isAsmParserOnly = 1 in {
   defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
   defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
 }
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr> {
+  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>;
+
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr> {
+  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>;
+  def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, XOP_W;
+  def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VFMADDSS4    : fma4s<0x6A, "vfmaddss">;
+  defm VFMADDSD4    : fma4s<0x6B, "vfmaddsd">;
+  defm VFMADDPS4    : fma4p<0x68, "vfmaddps">;
+  defm VFMADDPD4    : fma4p<0x69, "vfmaddpd">;
+  defm VFMSUBSS4    : fma4s<0x6E, "vfmsubss">;
+  defm VFMSUBSD4    : fma4s<0x6F, "vfmsubsd">;
+  defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps">;
+  defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd">;
+  defm VFNMADDSS4   : fma4s<0x7A, "vfnmaddss">;
+  defm VFNMADDSD4   : fma4s<0x7B, "vfnmaddsd">;
+  defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps">;
+  defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd">;
+  defm VFNMSUBSS4   : fma4s<0x7E, "vfnmsubss">;
+  defm VFNMSUBSD4   : fma4s<0x7F, "vfnmsubsd">;
+  defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps">;
+  defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd">;
+  defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps">;
+  defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd">;
+  defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps">;
+  defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd">;
+}
+
+// FMA4 Intrinsics patterns
+
+// VFMADD
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmadd_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUB
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsub_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMADD
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMADDSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFNMADDSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFNMADDSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMADDSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFNMADDSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFNMADDSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFNMADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFNMADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFNMADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFNMADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFNMADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFNMADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFNMADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFNMADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFNMADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmadd_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFNMADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFNMSUB
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMSUBSS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFNMSUBSS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ss VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFNMSUBSS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMSUBSD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFNMSUBSD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_sd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFNMSUBSD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFNMSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFNMSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFNMSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFNMSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFNMSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFNMSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFNMSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFNMSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFNMSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFNMSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfnmsub_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFNMSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMADDSUB
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDSUBPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMADDSUBPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDSUBPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMADDSUBPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMADDSUBPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMADDSUBPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMADDSUBPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFMADDSUBPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFMADDSUBPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMADDSUBPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFMADDSUBPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmaddsub_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFMADDSUBPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+// VFMSUBADD
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBADDPS4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, VR128:$src2,
+                                  (alignedloadv4f32 addr:$src3)),
+          (VFMSUBADDPS4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps VR128:$src1, (alignedloadv4f32 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBADDPS4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2, VR128:$src3),
+          (VFMSUBADDPD4rr VR128:$src1, VR128:$src2, VR128:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, VR128:$src2,
+                                  (alignedloadv2f64 addr:$src3)),
+          (VFMSUBADDPD4rm VR128:$src1, VR128:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd VR128:$src1, (alignedloadv2f64 addr:$src2),
+                                  VR128:$src3),
+          (VFMSUBADDPD4mr VR128:$src1, addr:$src2, VR128:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMSUBADDPS4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv8f32 addr:$src3)),
+          (VFMSUBADDPS4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_ps_256 VR256:$src1,
+                                      (alignedloadv8f32 addr:$src2),
+                                      VR256:$src3),
+          (VFMSUBADDPS4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
+
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2, VR256:$src3),
+          (VFMSUBADDPD4rrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1, VR256:$src2,
+                                  (alignedloadv4f64 addr:$src3)),
+          (VFMSUBADDPD4rmY VR256:$src1, VR256:$src2, addr:$src3)>;
+def : Pat<(int_x86_fma4_vfmsubadd_pd_256 VR256:$src1,
+                                      (alignedloadv4f64 addr:$src2),
+                                      VR256:$src3),
+          (VFMSUBADDPD4mrY VR256:$src1, addr:$src2, VR256:$src3)>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index ecd6a93..7ba3639 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -110,6 +110,8 @@ class A7     { bits<5> Prefix = 16; }
 class T8XD   { bits<5> Prefix = 17; }
 class T8XS   { bits<5> Prefix = 18; }
 class TAXD   { bits<5> Prefix = 19; }
+class XOP8   { bits<5> Prefix = 20; }
+class XOP9   { bits<5> Prefix = 21; }
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -118,7 +120,8 @@ class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
 class VEX_LIG { bit ignoresVEX_L = 1; }
 class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
-
+class XOP_W { bit hasXOP_WPrefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr, Domain d = GenericDomain>
   : Instruction {
@@ -158,6 +161,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
   bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
   bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+  bit hasXOP_WPrefix = 0;   // Same bit as VEX_W, but used for swapping operands
+  bit hasXOP_Prefix = 0;    // Does this inst require an XOP prefix?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -179,6 +184,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{38}    = hasVEX_L;
   let TSFlags{39}    = ignoresVEX_L;
   let TSFlags{40}    = has3DNow0F0FOpcode;
+  let TSFlags{41}    = hasXOP_WPrefix;
+  let TSFlags{42}    = hasXOP_Prefix;
 }
 
 class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -332,6 +339,10 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern>
       : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
         Requires<[HasAVX]>;
+class VoPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasXMM]>;
 
 // SSE2 Instruction Templates:
 // 
@@ -496,6 +507,30 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
       : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
         OpSize, VEX_4V, Requires<[HasFMA3]>;
 
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>,
+         XOP, XOP8, Requires<[HasXOP]>;
+
+//  XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
 // X86-64 Instruction templates...
 //
 
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 791bbe6..cd13bc4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,28 +130,12 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
-def X86Unpcklps  : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd  : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
-def X86Unpckhps  : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd  : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+def X86VPermilp  : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
 
-def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
-
-def X86VPermilps  : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
-def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
-def X86VPermilpd  : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
-def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
-
-def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 
@@ -363,12 +347,6 @@ def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
   return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
 }]>;
 
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
 // EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
 // to VEXTRACTF128 imm.
 def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 24c4a53..7d1b9a1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1528,9 +1528,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
       leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
       // well be shifting and then extracting the lower 16-bits.
-      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
       InsMI2 =
-        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+        BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
         .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
         .addReg(Src2, getKillRegState(isKill2));
       addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -2040,13 +2040,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
 }
 
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
+  if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
+  if (MI->isBranch() && !MI->isBarrier())
     return true;
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -2072,7 +2071,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // A terminator that isn't a branch can't easily be handled by this
     // analysis.
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     // Handle unconditional branches.
@@ -2556,6 +2555,8 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
   switch (MI->getOpcode()) {
   case X86::V_SET0:
+  case X86::FsFLD0SS:
+  case X86::FsFLD0SD:
     return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
   case X86::TEST8ri_NOREX:
     MI->setDesc(get(X86::TEST8ri));
@@ -2771,7 +2772,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::RCPSSr:
   case X86::RCPSSr_Int:
   case X86::ROUNDSDr:
+  case X86::ROUNDSDr_Int:
   case X86::ROUNDSSr:
+  case X86::ROUNDSSr_Int:
   case X86::RSQRTSSr:
   case X86::RSQRTSSr_Int:
   case X86::SQRTSSr:
@@ -2783,7 +2786,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::Int_VCVTSS2SDrr:
   case X86::VRCPSSr:
   case X86::VROUNDSDr:
+  case X86::VROUNDSDr_Int:
   case X86::VROUNDSSr:
+  case X86::VROUNDSSr_Int:
   case X86::VRSQRTSSr:
   case X86::VSQRTSSr:
     return true;
@@ -2911,11 +2916,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       Alignment = 16;
       break;
     case X86::FsFLD0SD:
-    case X86::VFsFLD0SD:
       Alignment = 8;
       break;
     case X86::FsFLD0SS:
-    case X86::VFsFLD0SS:
       Alignment = 4;
       break;
     default:
@@ -2950,9 +2953,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   case X86::AVX_SETALLONES:
   case X86::AVX2_SETALLONES:
   case X86::FsFLD0SD:
-  case X86::FsFLD0SS:
-  case X86::VFsFLD0SD:
-  case X86::VFsFLD0SS: {
+  case X86::FsFLD0SS: {
     // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
     // Create a constant-pool entry and operands to load from it.
 
@@ -2978,9 +2979,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     MachineConstantPool &MCP = *MF.getConstantPool();
     Type *Ty;
     unsigned Opc = LoadMI->getOpcode();
-    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+    if (Opc == X86::FsFLD0SS)
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+    else if (Opc == X86::FsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
       Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
@@ -3569,7 +3570,13 @@ static const unsigned ReplaceableInstrsAVX2[][3] = {
   { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
   { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
   { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
-  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    }
+  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
+  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+  { X86::VINSERTF128rm,  X86::VINSERTF128rm,  X86::VINSERTI128rm },
+  { X86::VINSERTF128rr,  X86::VINSERTF128rr,  X86::VINSERTI128rr },
+  { X86::VPERM2F128rm,   X86::VPERM2F128rm,   X86::VPERM2I128rm },
+  { X86::VPERM2F128rr,   X86::VPERM2F128rr,   X86::VPERM2I128rr }
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 35631d5..0bc3afa 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -473,6 +473,7 @@ def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
 
 def HasAVX       : Predicate<"Subtarget->hasAVX()">;
 def HasAVX2      : Predicate<"Subtarget->hasAVX2()">;
+def HasXMM       : Predicate<"Subtarget->hasXMM()">;
 def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">;
 
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
@@ -480,6 +481,7 @@ def HasAES       : Predicate<"Subtarget->hasAES()">;
 def HasCLMUL     : Predicate<"Subtarget->hasCLMUL()">;
 def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP       : Predicate<"Subtarget->hasXOP()">;
 def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
@@ -1502,6 +1504,9 @@ include "X86InstrFragmentsSIMD.td"
 // FMA - Fused Multiply-Add support (requires FMA)
 include "X86InstrFMA.td"
 
+// XOP
+include "X86InstrXOP.td"
+
 // SSE, MMX and 3DNow! vector support.
 include "X86InstrSSE.td"
 include "X86InstrMMX.td"
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 7cadac1..345f606 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -240,21 +240,13 @@ let Predicates = [HasAVX] in {
 }
 
 // Alias instructions that map fld0 to pxor for sse.
-// FIXME: Set encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
-    canFoldAsLoad = 1 in {
-  def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
-                   [(set FR32:$dst, fp32imm0)]>,
-                   Requires<[HasSSE1]>, TB, OpSize;
-  def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
-                   [(set FR64:$dst, fpimm0)]>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-  def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
-                    [(set FR32:$dst, fp32imm0)]>,
-                    Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-  def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
-                    [(set FR64:$dst, fpimm0)]>,
-                    Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isPseudo = 1 in {
+  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasXMM]>;
+  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+                   [(set FR64:$dst, fpimm0)]>, Requires<[HasXMMInt]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -569,6 +561,16 @@ let Predicates = [HasAVX] in {
                       (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
             (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+  // Move low f32 and clear high bits.
+  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSSrr (v4f32 (V_SET0)),
+                        (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSSrr (v4i32 (V_SET0)),
+                        (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
   }
 
   let AddedComplexity = 20 in {
@@ -596,6 +598,9 @@ let Predicates = [HasAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
+  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
@@ -613,6 +618,15 @@ let Predicates = [HasAVX] in {
             (SUBREG_TO_REG (i64 0),
                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                            sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+  // Move low f64 and clear high bits.
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSDrr (v2f64 (V_SET0)),
+                        (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -634,6 +648,16 @@ let Predicates = [HasAVX] in {
             (VMOVSSrr (v4f32 VR128:$src1),
                       (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
 
+  // 256-bit variants
+  def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+  def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
   // Shuffle with VMOVSD
   def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
             (VMOVSDrr VR128:$src1, FR64:$src2)>;
@@ -650,6 +674,17 @@ let Predicates = [HasAVX] in {
             (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
                                                    sub_sd))>;
 
+  // 256-bit variants
+  def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+  def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
   // is during lowering, where it's not possible to recognize the fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
@@ -657,6 +692,9 @@ let Predicates = [HasAVX] in {
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
             (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
                                                    sub_sd))>;
+  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
+                                                   sub_sd))>;
   def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
             (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
                                                    sub_sd))>;
@@ -761,6 +799,22 @@ let isCodeGenOnly = 1 in {
                             "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
 }
 
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+                        (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+                        (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+                        (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+                        (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+}
+
+
 def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
 def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
           (VMOVUPSYmr addr:$dst, VR256:$src)>;
@@ -1156,14 +1210,17 @@ let Predicates = [HasAVX] in {
                  (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+            (VMOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1,
                  (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
 
-  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
   // is during lowering, where it's not possible to recognize the load fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
@@ -1174,10 +1231,10 @@ let Predicates = [HasAVX] in {
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (VMOVHPSmr addr:$dst, VR128:$src)>;
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (VMOVHPDmr addr:$dst, VR128:$src)>;
 }
 
@@ -1189,21 +1246,24 @@ let Predicates = [HasSSE1] in {
                  (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+            (MOVHPSrm VR128:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128:$src1,
                  (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
             (MOVHPSmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [HasSSE2] in {
-  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
   // is during lowering, where it's not possible to recognize the load fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
 
@@ -1214,7 +1274,7 @@ let Predicates = [HasSSE2] in {
 
   // Store patterns
   def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+            (v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
             (MOVHPDmr addr:$dst, VR128:$src)>;
 }
 
@@ -1943,7 +2003,7 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
 // whenever possible to avoid declaring two versions of each one.
 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
           (VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
           (VCVTDQ2PSYrm addr:$src)>;
 
 def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
@@ -2430,27 +2490,27 @@ let AddedComplexity = 10 in {
 } // AddedComplexity
 
 let Predicates = [HasSSE1] in {
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
             (UNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
             (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
             (UNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
             (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
 }
 
 let Predicates = [HasSSE2] in {
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
             (UNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
             (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
             (UNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
             (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
 
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
@@ -2463,59 +2523,43 @@ let Predicates = [HasSSE2] in {
 }
 
 let Predicates = [HasAVX] in {
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
             (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
             (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
             (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+  def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
             (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
 
-  def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
+  def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
             (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
-            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
+  def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
             (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
-            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
-            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
-            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+  def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
             (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
+  def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
             (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
 
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
             (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
             (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
             (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+  def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
             (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
 
-  def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
+  def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
             (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
+  def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
             (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
-            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
-            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
-            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
-            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
+  def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
             (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
+  def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
             (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
 
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
@@ -2869,7 +2913,7 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
                 !strconcat(OpcodeStr,
                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                (ins ssmem:$src1, VR128:$src2),
+                (ins VR128:$src1, ssmem:$src2),
                 !strconcat(OpcodeStr,
                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
 }
@@ -3198,13 +3242,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 //===----------------------------------------------------------------------===//
 
 // Prefetch intrinsic.
-def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+def PREFETCHT0   : VoPSI<0x18, MRM1m, (outs), (ins i8mem:$src),
     "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+def PREFETCHT1   : VoPSI<0x18, MRM2m, (outs), (ins i8mem:$src),
     "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+def PREFETCHT2   : VoPSI<0x18, MRM3m, (outs), (ins i8mem:$src),
     "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+def PREFETCHNTA  : VoPSI<0x18, MRM0m, (outs), (ins i8mem:$src),
     "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
 
 // Flush cache
@@ -3652,6 +3696,8 @@ defm VPOR  : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
                           i128mem, 1, 0>, VEX_4V;
 defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
                           i128mem, 1, 0>, VEX_4V;
+defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
+                          i128mem, 0, 0>, VEX_4V;
 
 let ExeDomain = SSEPackedInt in {
   let neverHasSideEffects = 1 in {
@@ -3666,17 +3712,6 @@ let ExeDomain = SSEPackedInt in {
                       VEX_4V;
     // PSRADQri doesn't exist in SSE[1-3].
   }
-  def VPANDNrr : PDI<0xDF, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst,
-                          (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
-
-  def VPANDNrm : PDI<0xDF, MRMSrcMem,
-                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (X86andnp VR128:$src1,
-                                            (memopv2i64 addr:$src2)))]>, VEX_4V;
 }
 }
 
@@ -3714,6 +3749,8 @@ defm VPORY  : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
                            i256mem, 1, 0>, VEX_4V;
 defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
                            i256mem, 1, 0>, VEX_4V;
+defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
+                            i256mem, 0, 0>, VEX_4V;
 
 let ExeDomain = SSEPackedInt in {
   let neverHasSideEffects = 1 in {
@@ -3728,17 +3765,6 @@ let ExeDomain = SSEPackedInt in {
                       VEX_4V;
     // PSRADQYri doesn't exist in SSE[1-3].
   }
-  def VPANDNYrr : PDI<0xDF, MRMSrcReg,
-                     (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
-                     "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set VR256:$dst,
-                          (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V;
-
-  def VPANDNYrm : PDI<0xDF, MRMSrcMem,
-                     (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
-                     "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                     [(set VR256:$dst, (X86andnp VR256:$src1,
-                                            (memopv4i64 addr:$src2)))]>, VEX_4V;
 }
 }
 
@@ -3776,6 +3802,8 @@ defm POR  : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
                          i128mem, 1>;
 defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
                          i128mem, 1>;
+defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
+                          i128mem, 0>;
 
 let ExeDomain = SSEPackedInt in {
   let neverHasSideEffects = 1 in {
@@ -3787,14 +3815,6 @@ let ExeDomain = SSEPackedInt in {
                          (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
                          "psrldq\t{$src2, $dst|$dst, $src2}", []>;
     // PSRADQri doesn't exist in SSE[1-3].
-    def PANDNrr : PDI<0xDF, MRMSrcReg,
-                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                      "pandn\t{$src2, $dst|$dst, $src2}", []>;
-
-    let mayLoad = 1 in
-    def PANDNrm : PDI<0xDF, MRMSrcMem,
-                      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                      "pandn\t{$src2, $dst|$dst, $src2}", []>;
   }
 }
 } // Constraints = "$src1 = $dst"
@@ -4198,66 +4218,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
 }
 
 let Predicates = [HasAVX] in {
-  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
                                  bc_v4i32, 0>, VEX_4V;
-  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq,
+  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
                                  bc_v2i64, 0>, VEX_4V;
 
-  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
                                  bc_v4i32, 0>, VEX_4V;
-  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq,
+  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
                                  bc_v2i64, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
+  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
                                    bc_v32i8>, VEX_4V;
-  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
+  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
                                    bc_v16i16>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
+  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
                                    bc_v8i32>, VEX_4V;
-  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq,
+  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
                                    bc_v4i64>, VEX_4V;
 
-  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
+  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
                                    bc_v32i8>, VEX_4V;
-  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
+  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
                                    bc_v16i16>, VEX_4V;
-  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
+  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
                                    bc_v8i32>, VEX_4V;
-  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq,
+  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
                                    bc_v4i64>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw,
+  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
                                 bc_v16i8>;
-  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd,
+  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
                                 bc_v8i16>;
-  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq,
+  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
                                 bc_v4i32>;
-  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq,
+  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
                                 bc_v2i64>;
 
-  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw,
+  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
                                 bc_v16i8>;
-  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd,
+  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
                                 bc_v8i16>;
-  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq,
+  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
                                 bc_v4i32>;
-  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq,
+  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
                                 bc_v2i64>;
 }
 } // ExeDomain = SSEPackedInt
 
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
+}
+
 // Splat v2f64 / v2i64
 let AddedComplexity = 10 in {
   def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
@@ -4784,7 +4826,7 @@ def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 // AVX 256-bit register conversion intrinsics
 def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
            (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
            (VCVTDQ2PDYrm addr:$src)>;
 
 def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
@@ -4794,7 +4836,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
 
 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
           (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
           (VCVTDQ2PDYrm addr:$src)>;
 
 //===---------------------------------------------------------------------===//
@@ -5085,7 +5127,7 @@ let Constraints = "$src1 = $dst" in {
 
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
-                            PatFrag mem_frag128, Intrinsic IntId128> {
+                            Intrinsic IntId128> {
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5097,12 +5139,12 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (IntId128
-                       (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+                       (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
 }
 
 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
-                              PatFrag mem_frag256, Intrinsic IntId256> {
+                              Intrinsic IntId256> {
   def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
                     (ins VR256:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -5114,32 +5156,32 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
                       (IntId256
-                       (bitconvert (mem_frag256 addr:$src))))]>, OpSize;
+                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in {
-  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
                                   int_x86_ssse3_pabs_b_128>, VEX;
-  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw",
                                   int_x86_ssse3_pabs_w_128>, VEX;
-  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
                                   int_x86_ssse3_pabs_d_128>, VEX;
 }
 
 let Predicates = [HasAVX2] in {
-  defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
+  defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb",
                                     int_x86_avx2_pabs_b>, VEX;
-  defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
+  defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw",
                                     int_x86_avx2_pabs_w>, VEX;
-  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
+  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
                                     int_x86_avx2_pabs_d>, VEX;
 }
 
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
                               int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
                               int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
                               int_x86_ssse3_pabs_d_128>;
 
 //===---------------------------------------------------------------------===//
@@ -5148,8 +5190,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
 
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
-                             PatFrag mem_frag128, Intrinsic IntId128,
-                             bit Is2Addr = 1> {
+                             Intrinsic IntId128, bit Is2Addr = 1> {
   let isCommutable = 1 in
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
        (ins VR128:$src1, VR128:$src2),
@@ -5165,11 +5206,11 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
 }
 
 multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
-                               PatFrag mem_frag256, Intrinsic IntId256> {
+                               Intrinsic IntId256> {
   let isCommutable = 1 in
   def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
        (ins VR256:$src1, VR256:$src2),
@@ -5181,94 +5222,94 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
        !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
        [(set VR256:$dst,
          (IntId256 VR256:$src1,
-          (bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
-  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw",
                                       int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
-  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd",
                                       int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
-  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
                                       int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
-  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw",
                                       int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
-  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd",
                                       int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
-  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
                                       int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
-  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
                                       int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
-  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb",
                                       int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
-  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
                                       int_x86_ssse3_psign_b_128, 0>, VEX_4V;
-  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw",
                                       int_x86_ssse3_psign_w_128, 0>, VEX_4V;
-  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
                                       int_x86_ssse3_psign_d_128, 0>, VEX_4V;
 }
-defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
                                       int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX2] in {
 let isCommutable = 0 in {
-  defm VPHADDW    : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
+  defm VPHADDW    : SS3I_binop_rm_int_y<0x01, "vphaddw",
                                         int_x86_avx2_phadd_w>, VEX_4V;
-  defm VPHADDD    : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
+  defm VPHADDD    : SS3I_binop_rm_int_y<0x02, "vphaddd",
                                         int_x86_avx2_phadd_d>, VEX_4V;
-  defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
+  defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
                                         int_x86_avx2_phadd_sw>, VEX_4V;
-  defm VPHSUBW    : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
+  defm VPHSUBW    : SS3I_binop_rm_int_y<0x05, "vphsubw",
                                         int_x86_avx2_phsub_w>, VEX_4V;
-  defm VPHSUBD    : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
+  defm VPHSUBD    : SS3I_binop_rm_int_y<0x06, "vphsubd",
                                         int_x86_avx2_phsub_d>, VEX_4V;
-  defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
+  defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
                                         int_x86_avx2_phsub_sw>, VEX_4V;
-  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
+  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
                                         int_x86_avx2_pmadd_ub_sw>, VEX_4V;
-  defm VPSHUFB    : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
+  defm VPSHUFB    : SS3I_binop_rm_int_y<0x00, "vpshufb",
                                         int_x86_avx2_pshuf_b>, VEX_4V;
-  defm VPSIGNB    : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8,
+  defm VPSIGNB    : SS3I_binop_rm_int_y<0x08, "vpsignb",
                                         int_x86_avx2_psign_b>, VEX_4V;
-  defm VPSIGNW    : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16,
+  defm VPSIGNW    : SS3I_binop_rm_int_y<0x09, "vpsignw",
                                         int_x86_avx2_psign_w>, VEX_4V;
-  defm VPSIGND    : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32,
+  defm VPSIGND    : SS3I_binop_rm_int_y<0x0A, "vpsignd",
                                         int_x86_avx2_psign_d>, VEX_4V;
 }
-defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
+defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
                                         int_x86_avx2_pmul_hr_sw>, VEX_4V;
 }
 
 // None of these have i8 immediate fields.
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
-  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw",
                                      int_x86_ssse3_phadd_w_128>;
-  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd",
                                      int_x86_ssse3_phadd_d_128>;
-  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
                                      int_x86_ssse3_phadd_sw_128>;
-  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw",
                                      int_x86_ssse3_phsub_w_128>;
-  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd",
                                      int_x86_ssse3_phsub_d_128>;
-  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
                                      int_x86_ssse3_phsub_sw_128>;
-  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
                                      int_x86_ssse3_pmadd_ub_sw_128>;
-  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb",
                                      int_x86_ssse3_pshuf_b_128>;
-  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb",
                                      int_x86_ssse3_psign_b_128>;
-  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw",
                                      int_x86_ssse3_psign_w_128>;
-  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd",
                                        int_x86_ssse3_psign_d_128>;
 }
-defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
                                      int_x86_ssse3_pmul_hr_sw_128>;
 }
 
@@ -6017,8 +6058,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
                             Intrinsic F32Int,
                             Intrinsic F64Int, bit Is2Addr = 1> {
 let ExeDomain = GenericDomain in {
-  // Intrinsic operation, reg.
+  // Operation, reg.
   def SSr : SS4AIi8<opcss, MRMSrcReg,
+      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,
+              "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+          !strconcat(OpcodeStr,
+              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -6040,8 +6091,18 @@ let ExeDomain = GenericDomain in {
              (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
         OpSize;
 
-  // Intrinsic operation, reg.
+  // Operation, reg.
   def SDr : SS4AIi8<opcsd, MRMSrcReg,
+        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -6079,6 +6140,27 @@ let Predicates = [HasAVX] in {
   defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
                                   int_x86_sse41_round_ss,
                                   int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+
+  def : Pat<(ffloor FR32:$src),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+  def : Pat<(f64 (ffloor FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+  def : Pat<(f32 (fnearbyint FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+  def : Pat<(f64 (fnearbyint FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+  def : Pat<(f32 (fceil FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+  def : Pat<(f64 (fceil FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+  def : Pat<(f32 (frint FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+  def : Pat<(f64 (frint FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+  def : Pat<(f32 (ftrunc FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+  def : Pat<(f64 (ftrunc FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
 }
 
 defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6088,6 +6170,27 @@ let Constraints = "$src1 = $dst" in
 defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
                                int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
 
+def : Pat<(ffloor FR32:$src),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//
@@ -6195,7 +6298,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set VR128:$dst,
                        (IntId128
-                       (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+                        (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in
@@ -6221,7 +6324,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
 }
 
 /// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
@@ -6237,7 +6340,7 @@ multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
        !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
        [(set VR256:$dst,
          (IntId256 VR256:$src1,
-          (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in {
@@ -6400,38 +6503,38 @@ let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
     let ExeDomain = SSEPackedSingle in {
     defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
-                                        VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                        VR128, memopv4f32, i128mem, 0>, VEX_4V;
     defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
-              int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+              int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
     }
     let ExeDomain = SSEPackedDouble in {
     defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
-                                        VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                        VR128, memopv2f64, i128mem, 0>, VEX_4V;
     defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
-              int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+              int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
     }
   defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
   defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
   }
   let ExeDomain = SSEPackedSingle in
   defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
-                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                   VR128, memopv4f32, i128mem, 0>, VEX_4V;
   let ExeDomain = SSEPackedDouble in
   defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
-                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                   VR128, memopv2f64, i128mem, 0>, VEX_4V;
   let ExeDomain = SSEPackedSingle in
   defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
-                                   VR256, memopv32i8, i256mem, 0>, VEX_4V;
+                                   VR256, memopv8f32, i256mem, 0>, VEX_4V;
 }
 
 let Predicates = [HasAVX2] in {
   let isCommutable = 0 in {
   defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
-                                       VR256, memopv32i8, i256mem, 0>, VEX_4V;
+                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
   defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
-                                       VR256, memopv32i8, i256mem, 0>, VEX_4V;
+                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
   }
 }
 
@@ -6439,35 +6542,35 @@ let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in {
   let ExeDomain = SSEPackedSingle in
   defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv4f32, i128mem>;
   let ExeDomain = SSEPackedDouble in
   defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2f64, i128mem>;
   defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2i64, i128mem>;
   defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2i64, i128mem>;
   }
   let ExeDomain = SSEPackedSingle in
   defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
-                                  VR128, memopv16i8, i128mem>;
+                                  VR128, memopv4f32, i128mem>;
   let ExeDomain = SSEPackedDouble in
   defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
-                                  VR128, memopv16i8, i128mem>;
+                                  VR128, memopv2f64, i128mem>;
 }
 
 /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
 multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                                     RegisterClass RC, X86MemOperand x86memop,
                                     PatFrag mem_frag, Intrinsic IntId> {
-  def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+  def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
                   (ins RC:$src1, RC:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
                   SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 
-  def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+  def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
                   (ins RC:$src1, x86memop:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -6480,23 +6583,23 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
 let Predicates = [HasAVX] in {
 let ExeDomain = SSEPackedDouble in {
 defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_blendvpd>;
+                                           memopv2f64, int_x86_sse41_blendvpd>;
 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
-                                         memopv32i8, int_x86_avx_blendv_pd_256>;
+                                         memopv4f64, int_x86_avx_blendv_pd_256>;
 } // ExeDomain = SSEPackedDouble
 let ExeDomain = SSEPackedSingle in {
 defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_blendvps>;
+                                           memopv4f32, int_x86_sse41_blendvps>;
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
-                                         memopv32i8, int_x86_avx_blendv_ps_256>;
+                                         memopv8f32, int_x86_avx_blendv_ps_256>;
 } // ExeDomain = SSEPackedSingle
 defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_pblendvb>;
+                                           memopv2i64, int_x86_sse41_pblendvb>;
 }
 
 let Predicates = [HasAVX2] in {
 defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
-                                           memopv32i8, int_x86_avx2_pblendvb>;
+                                           memopv4i64, int_x86_avx2_pblendvb>;
 }
 
 let Predicates = [HasAVX] in {
@@ -6537,7 +6640,8 @@ let Predicates = [HasAVX2] in {
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
-  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+                               Intrinsic IntId> {
     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src1, VR128:$src2),
                     !strconcat(OpcodeStr,
@@ -6551,15 +6655,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
                      "\t{$src2, $dst|$dst, $src2}"),
                     [(set VR128:$dst,
                       (IntId VR128:$src1,
-                       (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+                       (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
   }
 }
 
 let ExeDomain = SSEPackedDouble in
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+                                  int_x86_sse41_blendvpd>;
 let ExeDomain = SSEPackedSingle in
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+                                  int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+                                  int_x86_sse41_pblendvb>;
 
 let Predicates = [HasSSE41] in {
   def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -6614,8 +6721,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
-         (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+         (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
 }
 
 /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
@@ -6630,8 +6736,7 @@ multiclass SS42I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
        (ins VR256:$src1, i256mem:$src2),
        !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
        [(set VR256:$dst,
-         (IntId256 VR256:$src1,
-          (bitconvert (memopv32i8 addr:$src2))))]>, OpSize;
+         (IntId256 VR256:$src1, (memopv4i64 addr:$src2)))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in {
@@ -6913,7 +7018,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
 }
 
 // Perform One Round of an AES Encryption/Decryption Flow
@@ -7144,7 +7249,7 @@ def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
 //===----------------------------------------------------------------------===//
 // VINSERTF128 - Insert packed floating-point values
 //
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR128:$src2, i8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7163,35 +7268,10 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
 def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
           (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
 
-def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
-                                   (i32 imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
-
 //===----------------------------------------------------------------------===//
 // VEXTRACTF128 - Extract packed floating-point values
 //
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
 def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
           (ins VR256:$src1, i8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -7210,31 +7290,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
 def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
 
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v4f32 (VEXTRACTF128rr
-                    (v8f32 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v2f64 (VEXTRACTF128rr
-                    (v4f64 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v4i32 (VEXTRACTF128rr
-                    (v8i32 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v2i64 (VEXTRACTF128rr
-                    (v4i64 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v8i16 (VEXTRACTF128rr
-                    (v16i16 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v16i8 (VEXTRACTF128rr
-                    (v32i8 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-
 //===----------------------------------------------------------------------===//
 // VMASKMOV - Conditional SIMD Packed Loads and Stores
 //
@@ -7288,7 +7343,8 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
   def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
              (ins RC:$src1, x86memop_i:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+             [(set RC:$dst, (IntVar RC:$src1,
+                             (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
 
   def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, i8imm:$src2),
@@ -7302,11 +7358,11 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
 
 let ExeDomain = SSEPackedSingle in {
   defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
-                               memopv4f32, memopv4i32,
+                               memopv4f32, memopv2i64,
                                int_x86_avx_vpermilvar_ps,
                                int_x86_avx_vpermil_ps>;
   defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
-                               memopv8f32, memopv8i32,
+                               memopv8f32, memopv4i64,
                                int_x86_avx_vpermilvar_ps_256,
                                int_x86_avx_vpermil_ps_256>;
 }
@@ -7321,19 +7377,28 @@ let ExeDomain = SSEPackedDouble in {
                                int_x86_avx_vpermil_pd_256>;
 }
 
-def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDYmi addr:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+                               (i8 imm:$imm))),
+          (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDYmi addr:$src1, imm:$imm)>;
 
 //===----------------------------------------------------------------------===//
 // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
 //
-let neverHasSideEffects = 1 in {
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
 def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -7359,22 +7424,9 @@ def : Pat<(int_x86_avx_vperm2f128_pd_256
                   VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
           (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
 def : Pat<(int_x86_avx_vperm2f128_si_256
-                  VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+                  VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3),
           (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
 
-def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-
 //===----------------------------------------------------------------------===//
 // VZERO - Zero YMM registers
 //
@@ -7451,9 +7503,9 @@ multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
 
 let isCommutable = 0 in {
 defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
-                                   VR128, memopv16i8, i128mem>;
+                                   VR128, memopv2i64, i128mem>;
 defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
-                                    VR256, memopv32i8, i256mem>;
+                                    VR256, memopv4i64, i256mem>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7541,11 +7593,12 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
                    (ins VR256:$src1, i256mem:$src2),
                    !strconcat(OpcodeStr,
                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   [(set VR256:$dst, (Int VR256:$src1, (mem_frag addr:$src2)))]>,
+                   [(set VR256:$dst, (Int VR256:$src1,
+                                      (bitconvert (mem_frag addr:$src2))))]>,
                    VEX_4V;
 }
 
-defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>;
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
 let ExeDomain = SSEPackedSingle in
 defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
 
@@ -7571,7 +7624,7 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
                              VEX_W;
 
 //===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
 //
 def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
@@ -7587,6 +7640,64 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
             imm:$src3))]>,
           VEX_4V;
 
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+                  (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+                   (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+                  (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+                  (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+// AVX1 patterns
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
+                  (memopv8f32 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+                  (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+                  (memopv4i64 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+                  (memopv4f64 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+                  (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+                  (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+
+
 //===----------------------------------------------------------------------===//
 // VINSERTI128 - Insert packed integer values
 //
@@ -7603,6 +7714,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
             (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
              imm:$src3))]>, VEX_4V;
 
+let Predicates = [HasAVX2] in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+// AVX1 patterns
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
 //===----------------------------------------------------------------------===//
 // VEXTRACTI128 - Extract packed integer values
 //
@@ -7617,6 +7773,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
           (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
           "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
 
+let Predicates = [HasAVX2] in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTI128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTI128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v8i16 (VEXTRACTI128rr
+                    (v16i16 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v16i8 (VEXTRACTI128rr
+                    (v32i8 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
+
+// AVX1 patterns
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4f32 (VEXTRACTF128rr
+                    (v8f32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2f64 (VEXTRACTF128rr
+                    (v4f64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTF128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v8i16 (VEXTRACTF128rr
+                    (v16i16 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v16i8 (VEXTRACTF128rr
+                    (v32i8 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
 //===----------------------------------------------------------------------===//
 // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
 //
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 0000000..64cc44d
--- /dev/null
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,243 @@
+//====- X86InstrXOP.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           []>, VEX;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", f128mem>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", f128mem>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", f128mem>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", f128mem>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", f128mem>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", f128mem>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", f128mem>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", f128mem>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", f128mem>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", f128mem>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", f128mem>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", f128mem>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", f128mem>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", f128mem>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", f128mem>;
+  defm VFRCZSS   : xop2op<0x82, "vfrczss", f32mem>;
+  defm VFRCZSD   : xop2op<0x83, "vfrczsd", f64mem>;
+  defm VFRCZPS   : xop2op<0x80, "vfrczps", f128mem>;
+  defm VFRCZPD   : xop2op<0x81, "vfrczpd", f128mem>;
+}
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr> {
+  def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           []>, VEX, VEX_L;
+  def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VFRCZPS : xop2op256<0x80, "vfrczps">;
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd">;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>, VEX_4VOp3;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>, VEX_4V, VEX_W;
+  def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins f128mem:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>, VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPSHLW : xop3op<0x95, "vpshlw">;
+  defm VPSHLQ : xop3op<0x97, "vpshlq">;
+  defm VPSHLD : xop3op<0x96, "vpshld">;
+  defm VPSHLB : xop3op<0x94, "vpshlb">;
+  defm VPSHAW : xop3op<0x99, "vpshaw">;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq">;
+  defm VPSHAD : xop3op<0x9A, "vpshad">;
+  defm VPSHAB : xop3op<0x98, "vpshab">;
+  defm VPROTW : xop3op<0x91, "vprotw">;
+  defm VPROTQ : xop3op<0x93, "vprotq">;
+  defm VPROTD : xop3op<0x92, "vprotd">;
+  defm VPROTB : xop3op<0x90, "vprotb">;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
+  def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, i8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>, VEX;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins f128mem:$src1, i8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPROTW : xop3opimm<0xC1, "vprotw">;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq">;
+  defm VPROTD : xop3opimm<0xC2, "vprotd">;
+  defm VPROTB : xop3opimm<0xC0, "vprotb">;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr> {
+  def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+  def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd">;
+  defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd">;
+  defm VPMACSWW   : xop4opm2<0x95, "vpmacsww">;
+  defm VPMACSWD   : xop4opm2<0x96, "vpmacswd">;
+  defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww">;
+  defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd">;
+  defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql">;
+  defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh">;
+  defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd">;
+  defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql">;
+  defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh">;
+  defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd">;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr> {
+  def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPCOMW  : xop4opimm<0xCD, "vpcomw">;
+  defm VPCOMUW : xop4opimm<0xED, "vpcomuw">;
+  defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq">;
+  defm VPCOMUD : xop4opimm<0xEE, "vpcomud">;
+  defm VPCOMUB : xop4opimm<0xEC, "vpcomub">;
+  defm VPCOMQ  : xop4opimm<0xCF, "vpcomq">;
+  defm VPCOMD  : xop4opimm<0xCE, "vpcomd">;
+  defm VPCOMB  : xop4opimm<0xCC, "vpcomb">;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr> {
+  def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+  def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM, XOP_W;
+  def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPPERM : xop4op<0xA3, "vpperm">;
+  defm VPCMOV : xop4op<0xA2, "vpcmov">;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr> {
+  def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+  def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM, XOP_W;
+  def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           []>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPCMOV : xop4op256<0xA2, "vpcmov">;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr> {
+  def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+        (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>;
+  def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+        (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>, XOP_W;
+  def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+        (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>;
+  def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+        (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>;
+  def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+        (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>, XOP_W;
+  def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+        (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        []>;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPERMIL2PD : xop5op<0x49, "vpermil2pd">;
+  defm VPERMIL2PS : xop5op<0x48, "vpermil2ps">;
+}
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 3f88fa6..2145a33 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -424,7 +424,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
 
 TargetJITInfo::LazyResolverFn
 X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  TsanIgnoreWritesBegin();
   JITCompilerFunction = F;
+  TsanIgnoreWritesEnd();
 
 #if defined (X86_32_JIT) && !defined (_MSC_VER)
   if (Subtarget->hasSSE1())
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 81ee665..9232196 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -368,10 +368,6 @@ ReSimplify:
   case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
   case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
   case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
-  case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
-  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
   case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
   case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
   case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c1ac9f3..4e80432 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -452,7 +452,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return (RealignStack &&
+  return (MF.getTarget().Options.RealignStack &&
           !MFI->hasVarSizedObjects());
 }
 
@@ -583,7 +583,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // sure we restore the stack pointer immediately after the call, there may
     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
     MachineBasicBlock::iterator B = MBB.begin();
-    while (I != B && !llvm::prior(I)->getDesc().isCall())
+    while (I != B && !llvm::prior(I)->isCall())
       --I;
     MBB.insert(I, New);
   }
@@ -665,7 +665,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
   case MVT::i8:
     if (High) {
       switch (Reg) {
-      default: return 0;
+      default: return getX86SubSuperRegister(Reg, MVT::i64, High);
       case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
         return X86::AH;
       case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -785,6 +785,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
       return X86::R15D;
     }
   case MVT::i64:
+    // For 64-bit mode if we've requested a "high" register and the
+    // Q or r constraints we want one of these high registers or
+    // just the register name otherwise.
+    if (High) {
+      switch (Reg) {
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SI;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DI;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BP;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SP;
+      // Fallthrough.
+      }
+    }
     switch (Reg) {
     default: return Reg;
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index e7bcbf8..6e092c7 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -273,6 +273,8 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
       if (IsAMD && ((ECX >> 16) & 0x1)) {
         HasFMA4 = true;
         ToggleFeature(X86::FeatureFMA4);
+        HasXOP = true;
+        ToggleFeature(X86::FeatureXOP);
       }
     }
   }
@@ -317,6 +319,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
   , HasCLMUL(false)
   , HasFMA3(false)
   , HasFMA4(false)
+  , HasXOP(false)
   , HasMOVBE(false)
   , HasRDRAND(false)
   , HasF16C(false)
@@ -387,9 +390,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
   assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  if(EnableSegmentedStacks && !isTargetELF())
-    report_fatal_error("Segmented stacks are only implemented on ELF.");
-
   // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
   if (StackAlignOverride)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e93f8e9..ccb9be0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -93,6 +93,9 @@ protected:
   /// HasFMA4 - Target has 4-operand fused multiply-add
   bool HasFMA4;
 
+  /// HasXOP - Target has XOP instructions
+  bool HasXOP;
+
   /// HasMOVBE - True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
@@ -198,6 +201,7 @@ public:
   bool hasCLMUL() const { return HasCLMUL; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
+  bool hasXOP() const { return HasXOP; }
   bool hasMOVBE() const { return HasMOVBE; }
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 1c9f3bd..126042e 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -31,9 +31,10 @@ extern "C" void LLVMInitializeX86Target() {
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                                          StringRef CPU, StringRef FS,
+                                         const TargetOptions &Options,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
-  : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false),
+  : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
     DataLayout(getSubtargetImpl()->isTargetDarwin() ?
                "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
                "n8:16:32-S128" :
@@ -52,9 +53,10 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
                                          StringRef CPU, StringRef FS,
+                                         const TargetOptions &Options,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
-  : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true),
+  : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
     DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
                "n8:16:32:64-S128"),
     InstrInfo(*this),
@@ -67,11 +69,12 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
 ///
 X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
-    Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
     FrameLowering(*this, Subtarget),
     ELFWriterInfo(is64Bit, true) {
   // Determine the PICStyle based on the target selected.
@@ -95,8 +98,11 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
   }
 
   // default to hard float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Hard;    
+  if (Options.FloatABIType == FloatABI::Default)
+    this->Options.FloatABIType = FloatABI::Hard;   
+
+  if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
+    report_fatal_error("Segmented stacks are only implemented on ELF.");
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 64be458..3ac1769 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -38,7 +38,7 @@ class X86TargetMachine : public LLVMTargetMachine {
 
 public:
   X86TargetMachine(const Target &T, StringRef TT, 
-                   StringRef CPU, StringRef FS,
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
                    CodeGenOpt::Level OL,
                    bool is64Bit);
@@ -85,7 +85,7 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -113,7 +113,7 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 9bb54a8..f8c30eb 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -220,7 +220,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
   for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
     MachineInstr *MI = I;
     DebugLoc dl = I->getDebugLoc();
-    bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn();
+    bool isControlFlow = MI->isCall() || MI->isReturn();
 
     // Shortcut: don't need to check regular instructions in dirty state. 
     if (!isControlFlow && CurState == ST_DIRTY)
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index d91da8c..de4abfc 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -21,17 +21,5 @@ add_llvm_target(XCoreCodeGen
   XCoreSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  LLVMXCoreDesc
-  LLVMXCoreInfo
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
index 1f7e2d5..53b4a9e 100644
--- a/lib/Target/XCore/LLVMBuild.txt
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
 [component_0]
 type = TargetGroup
 name = XCore
@@ -27,4 +30,3 @@ name = XCoreCodeGen
 parent = XCore
 required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo
 add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index 269822d..3a3f5b4 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc
   XCoreMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreDesc
-  LLVMMC
-  LLVMXCoreInfo
-  )
-
 add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
 
 # Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
index 628afb5..a80c939 100644
--- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreDesc
 parent = XCore
 required_libraries = MC XCoreInfo
 add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 7f84f69..2c34b87 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo
   XCoreTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
index d0b8e54..770ba87 100644
--- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = XCoreInfo
 parent = XCore
 required_libraries = MC Support Target
 add_to_library_groups = XCore
-
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index b8fb0ca..08f091e 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -24,7 +24,8 @@ namespace llvm {
   class XCoreTargetMachine;
   class formatted_raw_ostream;
 
-  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel);
 
 } // end namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 7f8b169..5007d04 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
 }
 
 bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
-  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+    MF.getFrameInfo()->hasVarSizedObjects();
 }
 
 void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 8d746ae..7564fba 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -41,8 +41,8 @@ namespace {
     const XCoreSubtarget &Subtarget;
 
   public:
-    XCoreDAGToDAGISel(XCoreTargetMachine &TM)
-      : SelectionDAGISel(TM),
+    XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
         Lowering(*TM.getTargetLowering()), 
         Subtarget(*TM.getSubtargetImpl()) { }
 
@@ -83,8 +83,9 @@ namespace {
 /// createXCoreISelDag - This pass converts a legalized DAG into a 
 /// XCore-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
-  return new XCoreDAGToDAGISel(TM);
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel) {
+  return new XCoreDAGToDAGISel(TM, OptLevel);
 }
 
 bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index d791daa..c5c668e 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -109,6 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index eec3674..7e1e035 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -21,9 +21,10 @@ using namespace llvm;
 ///
 XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
                                        CodeGenOpt::Level OL)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -34,7 +35,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
 }
 
 bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) {
-  PM.add(createXCoreISelDag(*this));
+  PM.add(createXCoreISelDag(*this, getOptLevel()));
   return false;
 }
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 3f2644d..0159b1e 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSelectionDAGInfo TSInfo;
 public:
   XCoreTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
                      CodeGenOpt::Level OL);
 
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 8fa66fc..58b3551 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,14 +20,3 @@ add_llvm_library(LLVMipo
   StripDeadPrototypes.cpp
   StripSymbols.cpp
   )
-
-add_llvm_library_dependencies(LLVMipo
-  LLVMAnalysis
-  LLVMCore
-  LLVMInstCombine
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  LLVMipa
-  )
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index c57e9fc..2e869e6 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@ namespace {
   struct GlobalStatus;
   struct GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(ID) {
@@ -84,7 +86,10 @@ namespace {
 }
 
 char GlobalOpt::ID = 0;
-INITIALIZE_PASS(GlobalOpt, "globalopt",
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -345,6 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       // and will invalidate our notion of what Init is.
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+        // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
         ConstantExpr *CE =
           dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
@@ -828,6 +834,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
 static void ConstantPropUsersOf(Value *V) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
+      // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
       if (Constant *NewC = ConstantFoldInstruction(I)) {
         I->replaceAllUsesWith(NewC);
 
@@ -1931,7 +1938,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
     if (GV->hasInitializer())
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
         TargetData *TD = getAnalysisIfAvailable<TargetData>();
-        Constant *New = ConstantFoldConstantExpression(CE, TD);
+        TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+        Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
         if (New && New != CE)
           GV->setInitializer(New);
       }
@@ -2304,7 +2312,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                              DenseMap<Constant*, Constant*> &MutatedMemory,
                              std::vector<GlobalVariable*> &AllocaTmps,
                              SmallPtrSet<Constant*, 8> &SimpleConstants,
-                             const TargetData *TD) {
+                             const TargetData *TD,
+                             const TargetLibraryInfo *TLI) {
   // Check to see if this function is already executing (recursion).  If so,
   // bail out.  TODO: we might want to accept limited recursion.
   if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
@@ -2461,7 +2470,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(Callee, Formals)) {
+        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
           InstResult = C;
         } else {
           return false;
@@ -2473,7 +2482,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
         Constant *RetVal;
         // Execute the call, if successful, use the return value.
         if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
-                              MutatedMemory, AllocaTmps, SimpleConstants, TD))
+                              MutatedMemory, AllocaTmps, SimpleConstants, TD,
+                              TLI))
           return false;
         InstResult = RetVal;
       }
@@ -2535,7 +2545,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
     if (!CurInst->use_empty()) {
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
-        InstResult = ConstantFoldConstantExpression(CE, TD);
+        InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
       
       Values[CurInst] = InstResult;
     }
@@ -2547,7 +2557,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
 /// EvaluateStaticConstructor - Evaluate static constructors in the function, if
 /// we can.  Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+                                      const TargetLibraryInfo *TLI) {
   /// MutatedMemory - For each store we execute, we update this map.  Loads
   /// check this to get the most up-to-date value.  If evaluation is successful,
   /// this state is committed to the process.
@@ -2572,7 +2583,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
   bool EvalSuccess = EvaluateFunction(F, RetValDummy,
                                       SmallVector<Constant*, 0>(), CallStack,
                                       MutatedMemory, AllocaTmps,
-                                      SimpleConstants, TD);
+                                      SimpleConstants, TD, TLI);
   
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
@@ -2601,8 +2612,6 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
   return EvalSuccess;
 }
 
-
-
 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
 /// Return true if anything changed.
 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
@@ -2611,6 +2620,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   if (Ctors.empty()) return false;
 
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+
   // Loop over global ctors, optimizing them when we can.
   for (unsigned i = 0; i != Ctors.size(); ++i) {
     Function *F = Ctors[i];
@@ -2628,7 +2639,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
     if (F->empty()) continue;
 
     // If we can evaluate the ctor at compile time, do.
-    if (EvaluateStaticConstructor(F, TD)) {
+    if (EvaluateStaticConstructor(F, TD, TLI)) {
       Ctors.erase(Ctors.begin()+i);
       MadeChange = true;
       --i;
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 884faca..b358fab 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = IPO
 parent = Transforms
 library_name = ipo
 required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils
-
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8fdfd72..f63f532 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -21,7 +21,6 @@
 #include "llvm/DefaultPasses.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/IPO.h"
@@ -101,6 +100,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
       MPM.add(Inliner);
       Inliner = 0;
     }
+    addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
     return;
   }
 
@@ -340,4 +340,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
   PassManagerBase *LPM = unwrap(PM);
   Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
 }
-
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index a46d5ad..d070ccc 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
-
-add_llvm_library_dependencies(LLVMInstCombine
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 3808278..464e9d0 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -22,6 +22,7 @@
 namespace llvm {
   class CallSite;
   class TargetData;
+  class TargetLibraryInfo;
   class DbgDeclareInst;
   class MemIntrinsic;
   class MemSetInst;
@@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
                                public InstVisitor<InstCombiner, Instruction*> {
   TargetData *TD;
+  TargetLibraryInfo *TLI;
   bool MadeIRChange;
 public:
   /// Worklist - All of the instructions that need to be simplified.
@@ -92,9 +94,11 @@ public:
   bool DoOneIteration(Function &F, unsigned ItNum);
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-                                 
+
   TargetData *getTargetData() const { return TD; }
 
+  TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
   // Visitation implementation - Implement instruction combining for different
   // instruction types.  The semantics are as follows:
   // Return Value:
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e8136ab..27c7c54 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // Get the current byte offset into the thing. Use the original
       // operand in case we're looking through a bitcast.
       SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      if (!GEP->getPointerOperandType()->isPointerTy())
+        return 0;
       Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
 
       Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -960,7 +962,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
   if (FTy->isVarArg()) {
-    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0);
+    int ix = FTy->getNumParams();
     // See if we can optimize any arguments passed through the varargs area of
     // the call.
     for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f10e48a..46e4acd 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,6 +14,7 @@
 #include "InstCombine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   return ReplaceInstUsesWith(CI, New);
 }
 
-
-
 /// EvaluateInDifferentType - Given an expression that 
 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
 /// insert the code to evaluate the expression.
@@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      C = ConstantFoldConstantExpression(CE, TD);
+      C = ConstantFoldConstantExpression(CE, TD, TLI);
     return C;
   }
 
@@ -528,9 +527,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       return ReplaceInstUsesWith(CI, In);
     }
-      
-      
-      
+
     // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
     // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
     // zext (X == 1) to i32 --> X        iff X has only the low bit set.
@@ -1213,10 +1210,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   }
   
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
-  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
-  if (Call && Call->getCalledFunction() &&
-      Call->getCalledFunction()->getName() == "sqrt" &&
+  if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+      Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
       Call->getNumArgOperands() == 1 &&
       Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bb1cbfa..144b92b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -284,7 +284,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 
     // Find out if the comparison would be true or false for the i'th element.
     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
-                                                  CompareRHS, TD);
+                                                  CompareRHS, TD, TLI);
     // If the result is undef for this element, ignore it.
     if (isa<UndefValue>(C)) {
       // Extend range state machines to cover this element in case there is an
@@ -1657,6 +1657,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
     return 0;
 
+  // This is only really a signed overflow check if the inputs have been
+  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+  if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+      IC.ComputeNumSignBits(B) < NeededSignBits)
+    return 0;
+
   // In order to replace the original add with a narrower
   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
   // and truncates that discard the high bits of the add.  Verify that this is
@@ -1787,6 +1795,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  // comparing -val or val with non-zero is the same as just comparing val
+  // ie, abs(val) != 0 -> val != 0
+  if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+  {
+    Value *Cond, *SelectTrue, *SelectFalse;
+    if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+                            m_Value(SelectFalse)))) {
+      if (Value *V = dyn_castNegVal(SelectTrue)) {
+        if (V == SelectFalse)
+          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+      }
+      else if (Value *V = dyn_castNegVal(SelectFalse)) {
+        if (V == SelectTrue)
+          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+      }
+    }
+  }
+
   Type *Ty = Op0->getType();
 
   // icmp's with boolean values can always be turned into bitwise operations
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 91e60a4..f1ea8ea 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -282,7 +282,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
 /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
 /// replaced with RepOp.
 static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                     const TargetData *TD) {
+                                     const TargetData *TD,
+                                     const TargetLibraryInfo *TLI) {
   // Trivial replacement.
   if (V == Op)
     return RepOp;
@@ -294,17 +295,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   // If this is a binary operator, try to simplify it with the replaced op.
   if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
     if (B->getOperand(0) == Op)
-      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
     if (B->getOperand(1) == Op)
-      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
   }
 
   // Same for CmpInsts.
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     if (C->getOperand(0) == Op)
-      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+                             TLI);
     if (C->getOperand(1) == Op)
-      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+                             TLI);
   }
 
   // TODO: We could hand off more cases to instsimplify here.
@@ -330,7 +333,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
           return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
 
       return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                      ConstOps, TD);
+                                      ConstOps, TD, TLI);
     }
   }
 
@@ -479,18 +482,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   // arms of the select. See if substituting this value into the arm and
   // simplifying the result yields the same value as the other arm.
   if (Pred == ICmpInst::ICMP_EQ) {
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
       return ReplaceInstUsesWith(SI, FalseVal);
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
       return ReplaceInstUsesWith(SI, FalseVal);
   } else if (Pred == ICmpInst::ICMP_NE) {
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
       return ReplaceInstUsesWith(SI, TrueVal);
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
       return ReplaceInstUsesWith(SI, TrueVal);
   }
 
@@ -679,6 +682,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return BinaryOperator::CreateOr(CondVal, FalseVal);
     else if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+    // select a, ~a, b -> (~a)&b
+    // select a, b, ~a -> (~a)|b
+    if (match(TrueVal, m_Not(m_Specific(CondVal))))
+      return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+    else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+      return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
   // Selecting between two integer constants?
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 6d85add..702e0f2 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       V = IC.Builder->CreateLShr(C, NumBits);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+      V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+                                         IC.getTargetLibraryInfo());
     return V;
   }
   
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index a7a6311..af065cd 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
@@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
 }
 
 char InstCombiner::ID = 0;
-INITIALIZE_PASS(InstCombiner, "instcombine",
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+                "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
                 "Combine redundant instructions", false, false)
 
 void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfo>();
 }
 
 
@@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           MadeChange = true;
         }
 
-      if ((*I)->getType() != IntPtrTy) {
+      Type *IndexTy = (*I)->getType();
+      if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
         // If we are using a wider index than needed for this platform, shrink
         // it to what we need.  If narrower, sign-extend it to what we need.
         // This explicit cast can make subsequent optimizations more obvious.
@@ -909,7 +915,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
   Value *StrippedPtr = PtrOp->stripPointerCasts();
-  PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+  PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+  // We do not handle pointer-vector geps here
+  if (!StrippedPtr)
+    return 0;
+
   if (StrippedPtr != PtrOp &&
     StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
@@ -1798,7 +1808,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 static bool AddReachableCodeToWorklist(BasicBlock *BB, 
                                        SmallPtrSet<BasicBlock*, 64> &Visited,
                                        InstCombiner &IC,
-                                       const TargetData *TD) {
+                                       const TargetData *TD,
+                                       const TargetLibraryInfo *TLI) {
   bool MadeIRChange = false;
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
@@ -1825,7 +1836,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+        if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
           DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
                        << *Inst << '\n');
           Inst->replaceAllUsesWith(C);
@@ -1843,7 +1854,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
 
           Constant*& FoldRes = FoldedConstants[CE];
           if (!FoldRes)
-            FoldRes = ConstantFoldConstantExpression(CE, TD);
+            FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
           if (!FoldRes)
             FoldRes = CE;
 
@@ -1909,7 +1920,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     // the reachable instructions.  Ignore blocks that are not reachable.  Keep
     // track of which blocks we visit.
     SmallPtrSet<BasicBlock*, 64> Visited;
-    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+                                               TLI);
 
     // Do a quick scan over the function.  If we find any blocks that are
     // unreachable, remove any instructions inside of them.  This prevents
@@ -1954,7 +1966,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
-      if (Constant *C = ConstantFoldInstruction(I, TD)) {
+      if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
         DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
         // Add operands to the worklist.
@@ -2062,7 +2074,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
 bool InstCombiner::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
-
+  TLI = &getAnalysis<TargetLibraryInfo>();
   
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt
index b73c303..62c6161 100644
--- a/lib/Transforms/InstCombine/LLVMBuild.txt
+++ b/lib/Transforms/InstCombine/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = InstCombine
 parent = Transforms
 required_libraries = Analysis Core Support Target TransformUtils
-
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b617539..4cc5727 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -55,8 +55,11 @@ static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
 static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
 
 static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int   kAsanCtorAndCtorPriority = 1;
 static const char *kAsanReportErrorTemplate = "__asan_report_";
 static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
 static const char *kAsanInitName = "__asan_init";
 static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
 static const char *kAsanMappingScaleName = "__asan_mapping_scale";
@@ -434,6 +437,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   IRBuilder<> IRB1(CheckTerm);
   Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
   Crash->setDebugLoc(OrigIns->getDebugLoc());
+  ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
 }
 
 // This function replaces all global variables with new variables that have
@@ -517,7 +521,11 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
         NewTy, G->getInitializer(),
         Constant::getNullValue(RightRedZoneTy), NULL);
 
-    GlobalVariable *Name = createPrivateGlobalForString(M, G->getName());
+    SmallString<2048> DescriptionOfGlobal = G->getName();
+    DescriptionOfGlobal += " (";
+    DescriptionOfGlobal += M.getModuleIdentifier();
+    DescriptionOfGlobal += ")";
+    GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
 
     // Create a new global variable with enough space for a redzone.
     GlobalVariable *NewGlobal = new GlobalVariable(
@@ -558,6 +566,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
                   IRB.CreatePointerCast(AllGlobals, IntptrTy),
                   ConstantInt::get(IntptrTy, n));
 
+  // We also need to unregister globals at the end, e.g. when a shared library
+  // gets closed.
+  Function *AsanDtorFunction = Function::Create(
+      FunctionType::get(Type::getVoidTy(*C), false),
+      GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+  BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+  IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+  Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
+      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+
+  IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+                       IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                       ConstantInt::get(IntptrTy, n));
+  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
   DEBUG(dbgs() << M);
   return true;
 }
@@ -631,7 +655,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
     Res |= handleFunction(M, *F);
   }
 
-  appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/);
+  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
 
   return Res;
 }
@@ -951,8 +975,8 @@ BlackList::BlackList(const std::string &Path) {
 
   OwningPtr<MemoryBuffer> File;
   if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) {
-    errs() << EC.message();
-    exit(1);
+    report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " +
+                       EC.message());
   }
   MemoryBuffer *Buff = File.take();
   const char *Data = Buff->getBufferStart();
@@ -962,15 +986,23 @@ BlackList::BlackList(const std::string &Path) {
   for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
     if (Lines[i].startswith(kFunPrefix)) {
       std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
-      if (Fun.size()) {
-        Fun += "|";
-      }
+      std::string ThisFuncRE;
       // add ThisFunc replacing * with .*
       for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
         if (ThisFunc[j] == '*')
-          Fun += '.';
-        Fun += ThisFunc[j];
+          ThisFuncRE += '.';
+        ThisFuncRE += ThisFunc[j];
       }
+      // Check that the regexp is valid.
+      Regex CheckRE(ThisFuncRE);
+      std::string Error;
+      if (!CheckRE.isValid(Error))
+        report_fatal_error("malformed blacklist regex: " + ThisFunc +
+                           ": " + Error);
+      // Append to the final regexp.
+      if (Fun.size())
+        Fun += "|";
+      Fun += ThisFuncRE;
     }
   }
   if (Fun.size()) {
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 929b7cd..a4a1fef 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -7,10 +7,3 @@ add_llvm_library(LLVMInstrumentation
   PathProfiling.cpp
   ProfilingUtils.cpp
   )
-
-add_llvm_library_dependencies(LLVMInstrumentation
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTransformUtils
-  )
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ccf7e11..96e5d5b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -43,12 +43,14 @@ namespace {
   public:
     static char ID;
     GCOVProfiler()
-        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+          UseExtraChecksum(false) {
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
-    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+                 bool useExtraChecksum = false)
         : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
-          Use402Format(use402Format) {
+          Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
       assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
@@ -94,6 +96,7 @@ namespace {
     bool EmitNotes;
     bool EmitData;
     bool Use402Format;
+    bool UseExtraChecksum;
 
     Module *M;
     LLVMContext *Ctx;
@@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
                 "Insert instrumentation for GCOV profiling", false, false)
 
 ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
-                                         bool Use402Format) {
-  return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+                                         bool Use402Format,
+                                         bool UseExtraChecksum) {
+  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
 }
 
 namespace {
@@ -167,7 +171,7 @@ namespace {
     }
 
     uint32_t length() {
-      // Here 2 = 1 for string lenght + 1 for '0' id#.
+      // Here 2 = 1 for string length + 1 for '0' id#.
       return lengthOfGCOVString(Filename) + 2 + Lines.size();
     }
 
@@ -244,10 +248,12 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-    GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+    GCOVFunction(DISubprogram SP, raw_ostream *os,
+                 bool Use402Format, bool UseExtraChecksum) {
       this->os = os;
 
       Function *F = SP.getFunction();
+      DEBUG(dbgs() << "Function: " << F->getName() << "\n");
       uint32_t i = 0;
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
         Blocks[BB] = new GCOVBlock(i++, os);
@@ -257,14 +263,14 @@ namespace {
       writeBytes(FunctionTag, 4);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
           1 + lengthOfGCOVString(SP.getFilename()) + 1;
-      if (!Use402Format)
-        ++BlockLen; // For second checksum.
+      if (UseExtraChecksum)
+        ++BlockLen;
       write(BlockLen);
       uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
       write(Ident);
-      write(0);  // checksum #1
-      if (!Use402Format)
-        write(0);  // checksum #2
+      write(0);  // lineno checksum
+      if (UseExtraChecksum)
+        write(0);  // cfg checksum
       writeGCOVString(SP.getName());
       writeGCOVString(SP.getFilename());
       write(SP.getLineNumber());
@@ -290,6 +296,7 @@ namespace {
       for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
         write(0);  // No flags on our blocks.
       }
+      DEBUG(dbgs() << Blocks.size() << " blocks.\n");
 
       // Emit edges between blocks.
       for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
@@ -301,6 +308,8 @@ namespace {
         write(Block.OutEdges.size() * 2 + 1);
         write(Block.Number);
         for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+          DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+                       << "\n");
           write(Block.OutEdges[i]->Number);
           write(0);  // no flags
         }
@@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) {
 }
 
 void GCOVProfiler::emitGCNO() {
-  DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
-  if (CU_Nodes) {
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      // Each compile unit gets its own .gcno file. This means that whether we run
-      // this pass over the original .o's as they're produced, or run it after
-      // LTO, we'll generate the same .gcno files.
-      
-      DICompileUnit CU(CU_Nodes->getOperand(i));
-      raw_fd_ostream *&out = GcnoFiles[CU];
-      std::string ErrorInfo;
-      out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
-                               raw_fd_ostream::F_Binary);
-      if (!Use402Format)
-        out->write("oncg*404MVLL", 12);
-      else
-        out->write("oncg*204MVLL", 12);
-  
-      DIArray SPs = CU.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-        DISubprogram SP(SPs.getElement(i));
-        if (!SP.Verify()) continue;
-        raw_fd_ostream *&os = GcnoFiles[CU];
-        
-        Function *F = SP.getFunction();
-        if (!F) continue;
-        GCOVFunction Func(SP, os, Use402Format);
-        
-        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-          GCOVBlock &Block = Func.getBlock(BB);
-          TerminatorInst *TI = BB->getTerminator();
-          if (int successors = TI->getNumSuccessors()) {
-            for (int i = 0; i != successors; ++i) {
-              Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
-            }
-          } else if (isa<ReturnInst>(TI)) {
-            Block.addEdge(Func.getReturnBlock());
-          }
-          
-          uint32_t Line = 0;
-          for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
-            const DebugLoc &Loc = I->getDebugLoc();
-            if (Loc.isUnknown()) continue;
-            if (Line == Loc.getLine()) continue;
-            Line = Loc.getLine();
-            if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
-            
-            GCOVLines &Lines = Block.getFile(SP.getFilename());
-            Lines.addLine(Loc.getLine());
+  if (!CU_Nodes) return;
+
+  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
+
+    DICompileUnit CU(CU_Nodes->getOperand(i));
+    std::string ErrorInfo;
+    raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+    if (!Use402Format)
+      out.write("oncg*404MVLL", 12);
+    else
+      out.write("oncg*204MVLL", 12);
+
+    DIArray SPs = CU.getSubprograms();
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+      DISubprogram SP(SPs.getElement(i));
+      if (!SP.Verify()) continue;
+
+      Function *F = SP.getFunction();
+      if (!F) continue;
+      GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        GCOVBlock &Block = Func.getBlock(BB);
+        TerminatorInst *TI = BB->getTerminator();
+        if (int successors = TI->getNumSuccessors()) {
+          for (int i = 0; i != successors; ++i) {
+            Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
           }
+        } else if (isa<ReturnInst>(TI)) {
+          Block.addEdge(Func.getReturnBlock());
+        }
+
+        uint32_t Line = 0;
+        for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+             I != IE; ++I) {
+          const DebugLoc &Loc = I->getDebugLoc();
+          if (Loc.isUnknown()) continue;
+          if (Line == Loc.getLine()) continue;
+          Line = Loc.getLine();
+          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          Lines.addLine(Loc.getLine());
         }
-        Func.writeOut();
       }
+      Func.writeOut();
     }
-  }
-
-  for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
-           I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
-    raw_fd_ostream *&out = I->second;
-    out->write("\0\0\0\0\0\0\0\0", 8);  // EOF
-    out->close();
-    delete out;
+    out.write("\0\0\0\0\0\0\0\0", 8);  // EOF
+    out.close();
   }
 }
 
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
index f302d03..d36ad54 100644
--- a/lib/Transforms/Instrumentation/LLVMBuild.txt
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Instrumentation
 parent = Transforms
 required_libraries = Analysis Core Support TransformUtils
-
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index d36b898..b2ef49a 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -15,8 +15,10 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = IPO InstCombine Instrumentation Scalar Utils
+
 [component_0]
 type = Group
 name = Transforms
 parent = Libraries
-
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index a6f0cf3..d660c72 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
   Sink.cpp
   TailRecursionElimination.cpp
   )
-
-add_llvm_library_dependencies(LLVMScalarOpts
-  LLVMAnalysis
-  LLVMCore
-  LLVMInstCombine
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b2..f9abfe9 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -69,6 +70,7 @@ namespace {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// transformation profitability.
     const TargetLowering *TLI;
+    const TargetLibraryInfo *TLInfo;
     DominatorTree *DT;
     ProfileInfo *PFI;
     
@@ -97,6 +99,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
   private:
@@ -116,7 +119,10 @@ namespace {
 }
 
 char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+                "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
                 "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
   ModifiedDT = false;
+  TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
 
@@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     WeakVH IterHandle(CurInstIterator);
     
     ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
-                              ModifiedDT ? 0 : DT);
+                              TLInfo, ModifiedDT ? 0 : DT);
 
     // If the iterator instruction was recursively deleted, start over at the
     // start of the block.
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6..5430f62 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
 #include "llvm/Constant.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -42,19 +44,22 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<TargetLibraryInfo>();
     }
   };
 }
 
 char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+                "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
                 "Simple constant propagation", false, false)
 
 FunctionPass *llvm::createConstantPropagationPass() {
   return new ConstantPropagation();
 }
 
-
 bool ConstantPropagation::runOnFunction(Function &F) {
   // Initialize the worklist to all of the instructions ready to process...
   std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
       WorkList.insert(&*i);
   }
   bool Changed = false;
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
 
   while (!WorkList.empty()) {
     Instruction *I = *WorkList.begin();
     WorkList.erase(WorkList.begin());    // Get an element from the worklist...
 
     if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I)) {
+      if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
         // Add all of the users of this instruction to the worklist, they might
         // be constant propagatable now...
         for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f5688cb..8729019 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   // writes to addresses which will definitely be overwritten later
   if (LaterOff > EarlierOff &&
       LaterOff < int64_t(EarlierOff + Earlier.Size) &&
-      LaterOff + Later.Size >= EarlierOff + Earlier.Size)
+      int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
     return OverwriteEnd;
 
   // Otherwise, they don't completely overlap.
@@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
                                    BasicBlock *BB, DominatorTree *DT) {
   for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
     BasicBlock *Pred = *I;
+    if (Pred == BB) continue;
     TerminatorInst *PredTI = Pred->getTerminator();
     if (PredTI->getNumSuccessors() != 1)
       continue;
@@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
        I != E; ++I)
     DeadStackObjects.erase(*I);
 }
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2..5241e11 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
@@ -215,6 +216,7 @@ namespace {
 class EarlyCSE : public FunctionPass {
 public:
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   DominatorTree *DT;
   typedef RecyclingAllocator<BumpPtrAllocator,
                       ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -263,6 +265,7 @@ private:
   // This transformation requires dominator postdominator info
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<DominatorTree>();
+    AU.addRequired<TargetLibraryInfo>();
     AU.setPreservesCFG();
   }
 };
@@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() {
 
 INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
@@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     
     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
     // its simpler value.
-    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+    if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
       Inst->replaceAllUsesWith(V);
       Inst->eraseFromParent();
@@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
 
 bool EarlyCSE::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   DT = &getAnalysis<DominatorTree>();
   
   // Tables that the pass uses when walking the domtree.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index a51cbb6..374fdd7 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
@@ -446,7 +447,8 @@ namespace {
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
     const TargetData *TD;
-    
+    const TargetLibraryInfo *TLI;
+
     ValueTable VN;
     
     /// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +532,7 @@ namespace {
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
+      AU.addRequired<TargetLibraryInfo>();
       if (!NoLoads)
         AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
@@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
@@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) {
   // to value numbering it.  Value numbering often exposes redundancies, for
   // example if it determines that %y is equal to %x then the instruction
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
-  if (Value *V = SimplifyInstruction(I, TD, DT)) {
+  if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
     I->replaceAllUsesWith(V);
     if (MD && V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
@@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) {
     MD = &getAnalysis<MemoryDependenceAnalysis>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 0772b48..ad8689a 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) {
       continue;
 
     // Ignore fancy-aligned globals for now.
-    unsigned Alignment = I->getAlignment();
+    unsigned Alignment = TD->getPreferredAlignment(I);
     Type *Ty = I->getType()->getElementType();
     if (Alignment > TD->getABITypeAlignment(Ty))
       continue;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1f21108..6d52b22 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,18 +58,16 @@ STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
 
-namespace llvm {
-  cl::opt<bool> EnableIVRewrite(
-    "enable-iv-rewrite", cl::Hidden,
-    cl::desc("Enable canonical induction variable rewriting"));
-
-  // Trip count verification can be enabled by default under NDEBUG if we
-  // implement a strong expression equivalence checker in SCEV. Until then, we
-  // use the verify-indvars flag, which may assert in some cases.
-  cl::opt<bool> VerifyIndvars(
-    "verify-indvars", cl::Hidden,
-    cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+static cl::opt<bool> EnableIVRewrite(
+  "enable-iv-rewrite", cl::Hidden,
+  cl::desc("Enable canonical induction variable rewriting"));
+
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+  "verify-indvars", cl::Hidden,
+  cl::desc("Verify the ScalarEvolution result after running indvars"));
 
 namespace {
   class IndVarSimplify : public LoopPass {
@@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
     // base of a recurrence. This handles the case in which SCEV expansion
     // converts a pointer type recurrence into a nonrecurrent pointer base
     // indexed by an integer recurrence.
+
+    // If the GEP base pointer is a vector of pointers, abort.
+    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+      return false;
+
     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
     if (FromBase == ToBase)
@@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
   else
     return 0;
 
+  // When creating this AddExpr, don't apply the current operations NSW or NUW
+  // flags. This instruction may be guarded by control flow that the no-wrap
+  // behavior depends on. Non-control-equivalent instructions can be mapped to
+  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+  // semantics to those operations.
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
-                   IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
 
   if (!AddRec || AddRec->getLoop() != L)
     return 0;
@@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 /// BackedgeTakenInfo. If these expressions have not been reduced, then
 /// expanding them may incur additional cost (albeit in the loop preheader).
 static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+                                SmallPtrSet<const SCEV*, 8> &Processed,
                                 ScalarEvolution *SE) {
+  if (!Processed.insert(S))
+    return false;
+
   // If the backedge-taken count is a UDiv, it's very likely a UDiv that
   // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
   // precise expression, rather than a UDiv from the user's code. If we can't
@@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, SE))
+      if (isHighCostExpansion(*I, BI, Processed, SE))
         return true;
     }
     return false;
@@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
   if (!BI)
     return false;
 
-  if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+  SmallPtrSet<const SCEV*, 8> Processed;
+  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af3..c78db3f 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
   ///
   class JumpThreading : public FunctionPass {
     TargetData *TD;
+    TargetLibraryInfo *TLI;
     LazyValueInfo *LVI;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<LazyValueInfo>();
       AU.addPreserved<LazyValueInfo>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   LVI = &getAnalysis<LazyValueInfo>();
 
   FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // Run constant folding to see if we can reduce the condition to a simple
   // constant.
   if (Instruction *I = dyn_cast<Instruction>(Condition)) {
-    Value *SimpleVal = ConstantFoldInstruction(I, TD);
+    Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
     if (SimpleVal) {
       I->replaceAllUsesWith(SimpleVal);
       I->eraseFromParent();
@@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Split them out to their own block.
     UnavailablePred =
-      SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
-                             "thread-pre-split", this);
+      SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
   }
 
   // If the value isn't available in all predecessors, then there will be
@@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
-                                    ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
   }
 
   // And finally, do it!
@@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
-                                    ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
   }
 
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 8098b36..8795cd8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved("scalar-evolution");
       AU.addPreservedID(LoopSimplifyID);
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
     LoopInfo      *LI;       // Current LoopInfo
     DominatorTree *DT;       // Dominator Tree for the current Loop.
 
+    TargetData *TD;          // TargetData for constant folding.
+    TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
+
     // State that is updated as we process loops.
     bool Changed;            // Set to true when we change anything.
     BasicBlock *Preheader;   // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
 
+  TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   CurAST = new AliasSetTracker(*AA);
   // Collect Alias info from subloops.
   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
       // Try constant folding this instruction.  If all the operands are
       // constants, it is technically hoistable, but it would be better to just
       // fold it.
-      if (Constant *C = ConstantFoldInstruction(&I)) {
+      if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
         CurAST->copyValue(&I, C);
         CurAST->deleteValue(&I);
@@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
-    if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load")))
+    if (LI->getMetadata("invariant.load"))
       return true;
 
     // Don't hoist loads which have may-aliased stores in loop.
@@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) {
 ///
 bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   // If it is not a trapping instruction, it is always safe to hoist.
-  if (Inst.isSafeToSpeculativelyExecute())
+  if (isSafeToSpeculativelyExecute(&Inst))
     return true;
 
   return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index 027634d..cee9119 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -21,4 +21,3 @@ name = Scalar
 parent = Transforms
 library_name = ScalarOpts
 required_libraries = Analysis Core InstCombine Support Target TransformUtils
-
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c..f0f05e6 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved("scalar-evolution");
+      AU.addRequired<TargetLibraryInfo>();
     }
   };
 }
@@ -50,6 +52,7 @@ namespace {
 char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
                 "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
   LoopInfo *LI = &getAnalysis<LoopInfo>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
         // Don't bother simplifying unused instructions.
         if (!I->use_empty()) {
-          Value *V = SimplifyInstruction(I, TD, DT);
+          Value *V = SimplifyInstruction(I, TD, TLI, DT);
           if (V && LI->replacementPreservesLCSSAForm(I, V)) {
             // Mark all uses for resimplification next time round the loop.
             for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4ae51d5..840614e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,17 @@
 #include <algorithm>
 using namespace llvm;
 
-namespace llvm {
-cl::opt<bool> EnableNested(
+static cl::opt<bool> EnableNested(
   "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
 
-cl::opt<bool> EnableRetry(
-    "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
+static cl::opt<bool> EnableRetry(
+  "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
 
 // Temporary flag to cleanup congruent phis after LSR phi expansion.
 // It's currently disabled until we can determine whether it's truly useful or
 // not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
-    "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+static cl::opt<bool> EnablePhiElim(
+  "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
 
 namespace {
 
@@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) {
   return AccessTy;
 }
 
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(AR->getType())) &&
+        SE.getSCEV(PN) == AR)
+      return true;
+  }
+  return false;
+}
+
 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
 /// specified set are trivially dead, delete them and see if this makes any of
 /// their operands subsequently dead.
@@ -705,7 +716,8 @@ public:
                    const DenseSet<const SCEV *> &VisitedRegs,
                    const Loop *L,
                    const SmallVectorImpl<int64_t> &Offsets,
-                   ScalarEvolution &SE, DominatorTree &DT);
+                   ScalarEvolution &SE, DominatorTree &DT,
+                   SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
 
   void print(raw_ostream &OS) const;
   void dump() const;
@@ -718,7 +730,8 @@ private:
   void RatePrimaryRegister(const SCEV *Reg,
                            SmallPtrSet<const SCEV *, 16> &Regs,
                            const Loop *L,
-                           ScalarEvolution &SE, DominatorTree &DT);
+                           ScalarEvolution &SE, DominatorTree &DT,
+                           SmallPtrSet<const SCEV *, 16> *LoserRegs);
 };
 
 }
@@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg,
     // on other loops, and cannot be expected to change sibling loops. If the
     // AddRec exists, consider it's register free and leave it alone. Otherwise,
     // do not consider this formula at all.
-    // FIXME: why do we need to generate such fomulae?
     else if (!EnableNested || L->contains(AR->getLoop()) ||
              (!AR->getLoop()->contains(L) &&
               DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
-      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
-           PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-        if (SE.isSCEVable(PN->getType()) &&
-            (SE.getEffectiveSCEVType(PN->getType()) ==
-             SE.getEffectiveSCEVType(AR->getType())) &&
-            SE.getSCEV(PN) == AR)
-          return;
-      }
+      if (isExistingPhi(AR, SE))
+        return;
+
+      // For !EnableNested, never rewrite IVs in other loops.
       if (!EnableNested) {
         Loose();
         return;
@@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg,
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
 void Cost::RatePrimaryRegister(const SCEV *Reg,
                                SmallPtrSet<const SCEV *, 16> &Regs,
                                const Loop *L,
-                               ScalarEvolution &SE, DominatorTree &DT) {
-  if (Regs.insert(Reg))
+                               ScalarEvolution &SE, DominatorTree &DT,
+                               SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+  if (LoserRegs && LoserRegs->count(Reg)) {
+    Loose();
+    return;
+  }
+  if (Regs.insert(Reg)) {
     RateRegister(Reg, Regs, L, SE, DT);
+    if (isLoser())
+      LoserRegs->insert(Reg);
+  }
 }
 
 void Cost::RateFormula(const Formula &F,
@@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F,
                        const DenseSet<const SCEV *> &VisitedRegs,
                        const Loop *L,
                        const SmallVectorImpl<int64_t> &Offsets,
-                       ScalarEvolution &SE, DominatorTree &DT) {
+                       ScalarEvolution &SE, DominatorTree &DT,
+                       SmallPtrSet<const SCEV *, 16> *LoserRegs) {
   // Tally up the registers.
   if (const SCEV *ScaledReg = F.ScaledReg) {
     if (VisitedRegs.count(ScaledReg)) {
       Loose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F,
       Loose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
   Formulae.push_back(F);
 
   // Record registers now being used by this use.
-  if (F.ScaledReg) Regs.insert(F.ScaledReg);
   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
 
   return true;
@@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) {
   if (&F != &Formulae.back())
     std::swap(F, Formulae.back());
   Formulae.pop_back();
-  assert(!Formulae.empty() && "LSRUse has no formulae left!");
 }
 
 /// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1389,7 +1405,6 @@ class LSRInstance {
 
   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
 
-public:
   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1450,6 +1465,7 @@ public:
   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
                          Pass *P);
 
+public:
   LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
 
   bool getChanged() const { return Changed; }
@@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
     do {
       const SCEV *S = Worklist.pop_back_val();
       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-        Strides.insert(AR->getStepRecurrence(SE));
+        if (EnableNested || AR->getLoop() == L)
+          Strides.insert(AR->getStepRecurrence(SE));
         Worklist.push_back(AR->getStart());
       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
         Worklist.append(Add->op_begin(), Add->op_end());
@@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() {
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   DenseSet<const SCEV *> VisitedRegs;
   SmallPtrSet<const SCEV *, 16> Regs;
+  SmallPtrSet<const SCEV *, 16> LoserRegs;
 #ifndef NDEBUG
   bool ChangedFormulae = false;
 #endif
@@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
          FIdx != NumForms; ++FIdx) {
       Formula &F = LU.Formulae[FIdx];
 
-      SmallVector<const SCEV *, 2> Key;
-      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
-           JE = F.BaseRegs.end(); J != JE; ++J) {
-        const SCEV *Reg = *J;
-        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
-          Key.push_back(Reg);
+      // Some formulas are instant losers. For example, they may depend on
+      // nonexistent AddRecs from other loops. These need to be filtered
+      // immediately, otherwise heuristics could choose them over others leading
+      // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+      // avoids the need to recompute this information across formulae using the
+      // same bad AddRec. Passing LoserRegs is also essential unless we remove
+      // the corresponding bad register from the Regs set.
+      Cost CostF;
+      Regs.clear();
+      CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+                        &LoserRegs);
+      if (CostF.isLoser()) {
+        // During initial formula generation, undesirable formulae are generated
+        // by uses within other loops that have some non-trivial address mode or
+        // use the postinc form of the IV. LSR needs to provide these formulae
+        // as the basis of rediscovering the desired formula that uses an AddRec
+        // corresponding to the existing phi. Once all formulae have been
+        // generated, these initial losers may be pruned.
+        DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
+              dbgs() << "\n");
       }
-      if (F.ScaledReg &&
-          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
-        Key.push_back(F.ScaledReg);
-      // Unstable sort by host order ok, because this is only used for
-      // uniquifying.
-      std::sort(Key.begin(), Key.end());
-
-      std::pair<BestFormulaeTy::const_iterator, bool> P =
-        BestFormulae.insert(std::make_pair(Key, FIdx));
-      if (!P.second) {
+      else {
+        SmallVector<const SCEV *, 2> Key;
+        for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+               JE = F.BaseRegs.end(); J != JE; ++J) {
+          const SCEV *Reg = *J;
+          if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+            Key.push_back(Reg);
+        }
+        if (F.ScaledReg &&
+            RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+          Key.push_back(F.ScaledReg);
+        // Unstable sort by host order ok, because this is only used for
+        // uniquifying.
+        std::sort(Key.begin(), Key.end());
+
+        std::pair<BestFormulaeTy::const_iterator, bool> P =
+          BestFormulae.insert(std::make_pair(Key, FIdx));
+        if (P.second)
+          continue;
+
         Formula &Best = LU.Formulae[P.first->second];
 
-        Cost CostF;
-        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
-        Regs.clear();
         Cost CostBest;
-        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         Regs.clear();
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
                         "    in favor of formula "; Best.print(dbgs());
               dbgs() << '\n');
+      }
 #ifndef NDEBUG
-        ChangedFormulae = true;
+      ChangedFormulae = true;
 #endif
-        LU.DeleteFormula(F);
-        --FIdx;
-        --NumForms;
-        Any = true;
-        continue;
-      }
+      LU.DeleteFormula(F);
+      --FIdx;
+      --NumForms;
+      Any = true;
     }
 
     // Now that we've filtered out some formulae, recompute the Regs set.
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 37f4c2c..22dbfe3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
   cl::desc("Allows loops to be partially unrolled until "
            "-unroll-threshold loop size is reached."));
 
-// Temporary flag to be removed in 3.0
 static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
-  cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+  cl::desc("Unroll loops with run-time trip counts"));
 
 namespace {
   class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
     // explicit -unroll-threshold).
     static const unsigned OptSizeUnrollThreshold = 50;
 
+    // Default unroll count for loops with run-time trip count if
+    // -unroll-count is not set
+    static const unsigned UnrollRuntimeCount = 8;
+
     unsigned CurrentCount;
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
@@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
   unsigned TripMultiple = 1;
-  if (!NoSCEVUnroll) {
-    // Find "latch trip count". UnrollLoop assumes that control cannot exit
-    // via the loop latch on any iteration prior to TripCount. The loop may exit
-    // early via an earlier branch.
-    BasicBlock *LatchBlock = L->getLoopLatch();
-    if (LatchBlock) {
-      TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
-      TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
-    }
-  }
-  else {
-    TripCount = L->getSmallConstantTripCount();
-    if (TripCount == 0)
-      TripMultiple = L->getSmallConstantTripMultiple();
+  // Find "latch trip count". UnrollLoop assumes that control cannot exit
+  // via the loop latch on any iteration prior to TripCount. The loop may exit
+  // early via an earlier branch.
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (LatchBlock) {
+    TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+    TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
   }
-  // Automatically select an unroll count.
+  // Use a default unroll-count if the user doesn't specify a value
+  // and the trip count is a run-time value.  The default is different
+  // for run-time or compile-time trip count loops.
   unsigned Count = CurrentCount;
+  if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+    Count = UnrollRuntimeCount;
+
   if (Count == 0) {
     // Conservative heuristic: if we know the trip count, see if we can
     // completely unroll (subject to the threshold, checked below); otherwise
@@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << Threshold << "\n");
-      if (!CurrentAllowPartial) {
+      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
-      // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = Threshold / LoopSize;
-      while (Count != 0 && TripCount%Count != 0) {
-        Count--;
+      if (TripCount) {
+        // Reduce unroll count to be modulo of TripCount for partial unrolling
+        Count = CurrentThreshold / LoopSize;
+        while (Count != 0 && TripCount%Count != 0)
+          Count--;
+      }
+      else if (UnrollRuntime) {
+        // Reduce unroll count to be a lower power-of-two value
+        while (Count != 0 && Size > CurrentThreshold) {
+          Count >>= 1;
+          Size = LoopSize*Count;
+        }
       }
       if (Count < 2) {
         DEBUG(dbgs() << "  could not unroll partially\n");
@@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+  if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c..a2d0e98 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -71,7 +71,9 @@ namespace {
     // LoopProcessWorklist - Used to check if second loop needs processing
     // after RewriteLoopBodyWithConditionConstant rewrites first loop.
     std::vector<Loop*> LoopProcessWorklist;
-    SmallPtrSet<Value *,8> UnswitchedVals;
+    
+    // FIXME: Consider custom class for this.
+    std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
     
     bool OptimizeForSize;
     bool redoLoop;
@@ -117,7 +119,15 @@ namespace {
   private:
 
     virtual void releaseMemory() {
-      UnswitchedVals.clear();
+      // We need to forget about all switches in the current loop.
+      // FIXME: Do it better than enumerating all blocks of code
+      // and see if it is a switch instruction.
+      for (Loop::block_iterator I = currentLoop->block_begin(), 
+           E = currentLoop->block_end(); I != E; ++I) {
+        SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
+        if (SI)
+          UnswitchedVals.erase(SI);
+      }
     }
 
     /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -128,6 +138,12 @@ namespace {
       if (I != LoopProcessWorklist.end())
         LoopProcessWorklist.erase(I);
     }
+    
+    /// For new loop switches we clone info about values that was
+    /// already unswitched and has redundant successors.
+    /// Note, that new loop data is stored inside the VMap.
+    void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+                                    const BasicBlock* SrcBB);     
 
     void initLoopData() {
       loopHeader = currentLoop->getHeader();
@@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() {
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
       Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
                                              currentLoop, Changed);
-      if (LoopCond && SI->getNumCases() > 1) {
+      unsigned NumCases = SI->getNumCases(); 
+      if (LoopCond && NumCases > 1) {
         // Find a value to unswitch on:
         // FIXME: this should chose the most expensive case!
         // FIXME: scan for a case with a non-critical edge?
-        Constant *UnswitchVal = SI->getCaseValue(1);
+        Constant *UnswitchVal = NULL;
+        
         // Do not process same value again and again.
-        if (!UnswitchedVals.insert(UnswitchVal))
+        // At this point we have some cases already unswitched and
+        // some not yet unswitched. Let's find the first not yet unswitched one.
+        for (unsigned i = 1; i < NumCases; ++i) {
+          Constant* UnswitchValCandidate = SI->getCaseValue(i);
+          if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
+            UnswitchVal = UnswitchValCandidate;
+            break;
+          }
+        }
+        
+        if (!UnswitchVal)
           continue;
 
         if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() {
   return Changed;
 }
 
+/// For new loop switches we clone info about values that was
+/// already unswitched and has redundant successors.
+/// Not that new loop data is stored inside the VMap.
+void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+                                             const BasicBlock* SrcBB) {
+  
+  const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
+  if (SI && UnswitchedVals.count(SI)) {
+    // Don't clone a totally simplified switch.
+    if (isa<Constant>(SI->getCondition()))
+      return;
+    Value* I = VMap.lookup(SI);
+    assert(I && "All instructions that are in SrcBB must be in VMap.");
+    UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
+  }
+}
+
 /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
 /// loop with no side effects (including infinite loops).
 ///
@@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
     // Check to see if a successor of the switch is guaranteed to go to the
     // latch block or exit through a one exit block without having any 
     // side-effects.  If so, determine the value of Cond that causes it to do
-    // this.  Note that we can't trivially unswitch on the default case.
-    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
-      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+    // this. 
+    // Note that we can't trivially unswitch on the default case or
+    // on already unswitched cases.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+      BasicBlock* LoopExitCandidate;
+      if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, 
                                                SI->getSuccessor(i)))) {
         // Okay, we found a trivial case, remember the value that is trivial.
-        if (Val) *Val = SI->getCaseValue(i);
+        ConstantInt* CaseVal = SI->getCaseValue(i);
+
+        // Check that it was not unswitched before, since already unswitched
+        // trivial vals are looks trivial too.
+        if (UnswitchedVals[SI].count(CaseVal))
+          continue;
+        LoopExitBB = LoopExitCandidate;
+        if (Val) *Val = CaseVal;
         break;
       }
+    }
   }
 
   // If we didn't find a single unique LoopExit block, or if the loop exit block
@@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   // expansion, and the number of basic blocks, to avoid loops with
   // large numbers of branches which cause loop unswitching to go crazy.
   // This is a very ad-hoc heuristic.
-  if (Metrics.NumInsts > Threshold ||
-      Metrics.NumBlocks * 5 > Threshold ||
+  
+  unsigned NumUnswitched =
+      (NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
+  
+  unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
+  unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
+  
+  if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
       Metrics.containsIndirectBr || Metrics.isRecursive) {
     DEBUG(dbgs() << "NOT unswitching loop %"
           << currentLoop->getHeader()->getName() << ", cost too high: "
@@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
     if (!ExitBlock->isLandingPad()) {
-      SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
-                             ".us-lcssa", this);
+      SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
     } else {
       SmallVector<BasicBlock*, 2> NewBBs;
       SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   ValueToValueMapTy VMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
     BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+    
+    // Inherit simplified switches info for NewBB
+    // We needn't pass NewBB since its instructions are already contained
+    // inside the VMap.
+    CloneUnswitchedVals(VMap, LoopBlocks[i]);
+    
     NewBlocks.push_back(NewBB);
     VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
     LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
       Instruction *U = dyn_cast<Instruction>(*UI);
       if (!U || !L->contains(U))
         continue;
-      U->replaceUsesOfWith(LIC, Replacement);
       Worklist.push_back(U);
     }
+    
+    for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+         UI != Worklist.end(); ++UI)
+      (*UI)->replaceUsesOfWith(LIC, Replacement);        
+    
     SimplifyCode(Worklist, L);
     return;
   }
@@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     BasicBlock *Switch = SI->getParent();
     BasicBlock *SISucc = SI->getSuccessor(DeadCase);
     BasicBlock *Latch = L->getLoopLatch();
+    
+    UnswitchedVals[SI].insert(Val);
+    
     if (!SI->findCaseDest(SISucc)) continue;  // Edge is critical.
     // If the DeadCase successor dominates the loop latch, then the
     // transformation isn't safe since it will delete the sole predecessor edge
@@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     // See if instruction simplification can hack this up.  This is common for
     // things like "select false, X, Y" after unswitching made the condition be
     // 'false'.
-    if (Value *V = SimplifyInstruction(I, 0, DT))
+    if (Value *V = SimplifyInstruction(I, 0, 0, DT))
       if (LI->replacementPreservesLCSSAForm(I, V)) {
         ReplaceUsesOfWith(I, V, Worklist, L, LPM);
         continue;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 9e4f51f..7335626 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
 } // end anon namespace
 
 bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
-  // If we found more than 8 stores to merge or 64 bytes, use memset.
-  if (TheStores.size() >= 8 || End-Start >= 64) return true;
+  // If we found more than 4 stores to merge or 16 bytes, use memset.
+  if (TheStores.size() >= 4 || End-Start >= 16) return true;
 
   // If there is nothing to merge, don't do anything.
   if (TheStores.size() < 2) return false;
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 80f5f01..8e9449f 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) {
         Arg->hasNestAttr() ||
         Arg->hasStructRetAttr())
       return false;
-  // Only consider values with pointer types, and not function pointers.
+  // Only consider values with pointer types.
+  // It seemes intuitive to exclude function pointer types as well, since
+  // functions are never reference-counted, however clang occasionally
+  // bitcasts reference-counted pointers to function-pointer type
+  // temporarily.
   PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+  if (!Ty)
     return false;
   // Conservatively assume anything else is a potential use.
   return true;
@@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
 
 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1165,6 +1170,7 @@ namespace {
     /// Partial - True of we've seen an opportunity for partial RR elimination,
     /// such as pushing calls into a CFG triangle or into one side of a
     /// CFG diamond.
+    /// TODO: Consider moving this to PtrState.
     bool Partial;
 
     /// ReleaseMetadata - If the Calls are objc_release calls and they all have
@@ -1251,16 +1257,6 @@ namespace {
       Seq = NewSeq;
     }
 
-    void SetSeqToRelease(MDNode *M) {
-      if (Seq == S_None || Seq == S_Use) {
-        Seq = M ? S_MovableRelease : S_Release;
-        RRI.ReleaseMetadata = M;
-      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
-        Seq = S_Release;
-        RRI.ReleaseMetadata = 0;
-      }
-    }
-
     Sequence GetSeq() const {
       return Seq;
     }
@@ -1488,7 +1484,7 @@ namespace {
     /// metadata.
     unsigned ImpreciseReleaseMDKind;
 
-    /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape
+    /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
     /// metadata.
     unsigned CopyOnEscapeMDKind;
 
@@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       // guards against loops in the middle of a sequence.
       if (SomeSuccHasSame && !AllSuccsHaveSame)
         S.ClearSequenceProgress();
+      break;
     }
     case S_CanRelease: {
       const Value *Arg = I->first;
@@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       // guards against loops in the middle of a sequence.
       if (SomeSuccHasSame && !AllSuccsHaveSame)
         S.ClearSequenceProgress();
+      break;
     }
     }
 }
@@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
       if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
         NestingDetected = true;
 
-      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
       S.RRI.clear();
+
+      MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+      S.RRI.ReleaseMetadata = ReleaseMetadata;
       S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
       S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
       S.RRI.Calls.insert(Inst);
@@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
       if (Pred == BB)
         continue;
       DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
-      assert(I != BBStates.end());
       // If we haven't seen this node yet, then we've found a CFG cycle.
       // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
-      if (!I->second.isVisitedTopDown())
+      if (I == BBStates.end() || !I->second.isVisitedTopDown())
         continue;
       MyStates.InitFromPred(I->second);
       while (PI != PE) {
         Pred = *PI++;
         if (Pred != BB) {
           I = BBStates.find(Pred);
-          assert(I != BBStates.end());
-          if (I->second.isVisitedTopDown())
+          if (I == BBStates.end() || I->second.isVisitedTopDown())
             MyStates.MergePred(I->second);
         }
       }
@@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   return NestingDetected;
 }
 
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
-                  DenseMap<const BasicBlock *, BBState> &BBStates,
-                  MapVector<Value *, RRInfo> &Retains,
-                  DenseMap<Value *, RRInfo> &Releases) {
-  // Use reverse-postorder on the reverse CFG for bottom-up, because we
-  // magically know that loops will be well behaved, i.e. they won't repeatedly
-  // call retain on a single pointer without doing a release. We can't use
-  // ReversePostOrderTraversal here because we want to walk up from each
-  // function exit point.
+static void
+ComputePostOrders(Function &F,
+                  SmallVectorImpl<BasicBlock *> &PostOrder,
+                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+  /// Backedges - Backedges detected in the DFS. These edges will be
+  /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+  /// traversed in the desired order.
+  DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+  /// Visited - The visited set, for doing DFS walks.
   SmallPtrSet<BasicBlock *, 16> Visited;
-  SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
-  SmallVector<BasicBlock *, 16> Order;
+
+  // Do DFS, computing the PostOrder.
+  SmallPtrSet<BasicBlock *, 16> OnStack;
+  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+  BasicBlock *EntryBB = &F.getEntryBlock();
+  SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+  Visited.insert(EntryBB);
+  OnStack.insert(EntryBB);
+  do {
+  dfs_next_succ:
+    succ_iterator End = succ_end(SuccStack.back().first);
+    while (SuccStack.back().second != End) {
+      BasicBlock *BB = *SuccStack.back().second++;
+      if (Visited.insert(BB)) {
+        SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+        OnStack.insert(BB);
+        goto dfs_next_succ;
+      }
+      if (OnStack.count(BB))
+        Backedges.insert(std::make_pair(SuccStack.back().first, BB));
+    }
+    OnStack.erase(SuccStack.back().first);
+    PostOrder.push_back(SuccStack.pop_back_val().first);
+  } while (!SuccStack.empty());
+
+  Visited.clear();
+
+  // Compute the exits, which are the starting points for reverse-CFG DFS.
+  SmallVector<BasicBlock *, 4> Exits;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
     BasicBlock *BB = I;
     if (BB->getTerminator()->getNumSuccessors() == 0)
-      Stack.push_back(std::make_pair(BB, pred_begin(BB)));
+      Exits.push_back(BB);
   }
-  while (!Stack.empty()) {
-    pred_iterator End = pred_end(Stack.back().first);
-    while (Stack.back().second != End) {
-      BasicBlock *BB = *Stack.back().second++;
-      if (Visited.insert(BB))
-        Stack.push_back(std::make_pair(BB, pred_begin(BB)));
-    }
-    Order.push_back(Stack.pop_back_val().first);
+
+  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+  SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+  for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+       I != E; ++I) {
+    BasicBlock *ExitBB = *I;
+    PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+    Visited.insert(ExitBB);
+    while (!PredStack.empty()) {
+    reverse_dfs_next_succ:
+      pred_iterator End = pred_end(PredStack.back().first);
+      while (PredStack.back().second != End) {
+        BasicBlock *BB = *PredStack.back().second++;
+        // Skip backedges detected in the forward-CFG DFS.
+        if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
+          continue;
+        if (Visited.insert(BB)) {
+          PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+          goto reverse_dfs_next_succ;
+        }
+      }
+      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
+    }
   }
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+                  DenseMap<const BasicBlock *, BBState> &BBStates,
+                  MapVector<Value *, RRInfo> &Retains,
+                  DenseMap<Value *, RRInfo> &Releases) {
+
+  // Use reverse-postorder traversals, because we magically know that loops
+  // will be well behaved, i.e. they won't repeatedly call retain on a single
+  // pointer without doing a release. We can't use the ReversePostOrderTraversal
+  // class here because we want the reverse-CFG postorder to consider each
+  // function exit point, and we want to ignore selected cycle edges.
+  SmallVector<BasicBlock *, 16> PostOrder;
+  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+  // Use reverse-postorder on the reverse CFG for bottom-up.
   bool BottomUpNestingDetected = false;
   for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-         Order.rbegin(), E = Order.rend(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
-  }
+       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+       I != E; ++I)
+    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
 
-  // Use regular reverse-postorder for top-down.
+  // Use reverse-postorder for top-down.
   bool TopDownNestingDetected = false;
-  typedef ReversePostOrderTraversal<Function *> RPOTType;
-  RPOTType RPOT(&F);
-  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
-  }
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       PostOrder.rbegin(), E = PostOrder.rend();
+       I != E; ++I)
+    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
 
   return TopDownNestingDetected && BottomUpNestingDetected;
 }
@@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
            UE = Alloca->use_end(); UI != UE; ) {
         CallInst *UserInst = cast<CallInst>(*UI++);
         if (!UserInst->use_empty())
-          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+          UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
         UserInst->eraseFromParent();
       }
       Alloca->eraseFromParent();
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index f6762ad..e4cb55c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -156,6 +157,7 @@ namespace {
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
   DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
 
@@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  SCCPSolver(const TargetData *td) : TD(td) {}
+  SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+    : TD(td), TLI(tli) {}
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
@@ -1125,7 +1128,7 @@ CallOverdefined:
      
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(F, Operands))
+      if (Constant *C = ConstantFoldCall(F, Operands, TLI))
         return markConstant(I, C);
     }
 
@@ -1517,6 +1520,9 @@ namespace {
   /// Sparse Conditional Constant Propagator.
   ///
   struct SCCP : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(ID) {
       initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
 //
 bool SCCP::runOnFunction(Function &F) {
   DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
-  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  SCCPSolver Solver(TD, TLI);
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
@@ -1641,6 +1649,9 @@ namespace {
   /// Constant Propagation.
   ///
   struct IPSCCP : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
     static char ID;
     IPSCCP() : ModulePass(ID) {
       initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1650,7 +1661,11 @@ namespace {
 } // end anonymous namespace
 
 char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+                "Interprocedural Sparse Conditional Constant Propagation",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
                 false, false)
 
@@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
-  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  SCCPSolver Solver(TD, TLI);
 
   // AddressTakenFunctions - This set keeps track of the address-taken functions
   // that are in the input.  As IPSCCP runs through and simplifies code,
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 4b14efc..bc70c51 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
 
       // Compute the offset that this GEP adds to the pointer.
       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      if (!GEP->getPointerOperandType()->isPointerTy())
+        return false;
       uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
                                                Indices);
       // See if all uses can be converted.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6e169de..f3184ec 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization {
     Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
                                          Intrinsic::cttz, ArgType);
-    Value *V = B.CreateCall(F, Op, "cttz");
+    Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
     V = B.CreateIntCast(V, B.getInt32Ty(), false);
 
@@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization {
       return ConstantInt::get(CI->getType(), 0);
 
     // If this is writing one byte, turn it into fputc.
-    if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+    // This optimisation is only valid, if the return value is unused.
+    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
       EmitFPutC(Char, CI->getArgOperand(3), B, TD);
       return ConstantInt::get(CI->getType(), 1);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c..ef65c0a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
   if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
-    if (!Inst->isSafeToSpeculativelyExecute()) {
+    if (!isSafeToSpeculativelyExecute(Inst)) {
       DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
       return false;
     }
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb..d831452 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
   // Check to see if this value is already used in the memory instruction's
   // block.  If so, it's already live into the block at the very least, so we
   // can reasonably fold it.
-  BasicBlock *MemBB = MemoryInst->getParent();
-  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
-       UI != E; ++UI)
-    // We know that uses of arguments and instructions have to be instructions.
-    if (cast<Instruction>(*UI)->getParent() == MemBB)
-      return true;
-  
-  return false;
+  return Val->isUsedInBasicBlock(MemoryInst->getParent());
 }
 
 
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd..ef4a473 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 /// of the edges being split is an exit of a loop with other exits).
 ///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
-                                         BasicBlock *const *Preds,
-                                         unsigned NumPreds, const char *Suffix,
-                                         Pass *P) {
+                                         ArrayRef<BasicBlock*> Preds,
+                                         const char *Suffix, Pass *P) {
   // Create new basic block, insert right before the original block.
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                          BB->getParent(), BB);
@@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   BranchInst *BI = BranchInst::Create(BB, NewBB);
   
   // Move the edges from Preds to point to NewBB instead of BB.
-  for (unsigned i = 0; i != NumPreds; ++i) {
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     // This is slightly more strict than necessary; the minimum requirement
     // is that there be no more than one indirectbr branching to BB. And
     // all BlockAddress uses would need to be updated.
@@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
   // account for the newly created predecessor.
-  if (NumPreds == 0) {
+  if (Preds.size() == 0) {
     // Insert dummy values as the incoming value.
     for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // Update DominatorTree, LoopInfo, and LCCSA analysis information.
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
-                            P, HasLoopExit);
+  UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
 
   // Update the PHI nodes in BB with the values coming from NewBB.
-  UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
-                 P, HasLoopExit);
+  UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
   return NewBB;
 }
 
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c052910..f752d79 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
           // form, which we're in the process of restoring!
           if (!Preds.empty() && HasPredOutsideOfLoop) {
             BasicBlock *NewExitBB =
-              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
-                                     "split", P);
+              SplitBlockPredecessors(Exit, Preds, "split", P);
             if (P->mustPreserveAnalysisID(LCSSAID))
               CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
           }
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6d5432d..d96f59c 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
   Local.cpp
   LoopSimplify.cpp
   LoopUnroll.cpp
+  LoopUnrollRuntime.cpp
   LowerExpectIntrinsic.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
@@ -28,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
   Utils.cpp
   ValueMapper.cpp
   )
-
-add_llvm_library_dependencies(LLVMTransformUtils
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  LLVMipa
-  )
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index dd4a659..f89e1b1 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       return false;
   }
 
-  // Find the personality function used by the landing pads of the caller. If it
-  // exists, then check to see that it matches the personality function used in
-  // the callee.
-  for (Function::const_iterator
-         I = Caller->begin(), E = Caller->end(); I != E; ++I)
+  // Get the personality function from the callee if it contains a landing pad.
+  Value *CalleePersonality = 0;
+  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+       I != E; ++I)
     if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
       const BasicBlock *BB = II->getUnwindDest();
-      // FIXME: This 'isa' here should become go away once the new EH system is
-      // in place.
-      if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
-        continue;
-      const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
-      const Value *CallerPersFn = LP->getPersonalityFn();
-
-      // If the personality functions match, then we can perform the
-      // inlining. Otherwise, we can't inline.
-      // TODO: This isn't 100% true. Some personality functions are proper
-      //       supersets of others and can be used in place of the other.
-      for (Function::const_iterator
-             I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
-        if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
-          const BasicBlock *BB = II->getUnwindDest();
-          // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
-          // the new EH system is in place.
-          if (const LandingPadInst *LP =
-              dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
-            if (CallerPersFn != LP->getPersonalityFn())
-              return false;
-          break;
-        }
-
+      // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
+      // the new EH system is in place.
+      if (const LandingPadInst *LP =
+          dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
+        CalleePersonality = LP->getPersonalityFn();
       break;
     }
 
+  // Find the personality function used by the landing pads of the caller. If it
+  // exists, then check to see that it matches the personality function used in
+  // the callee.
+  if (CalleePersonality)
+    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+         I != E; ++I)
+      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+        const BasicBlock *BB = II->getUnwindDest();
+        // FIXME: This 'isa' here should become go away once the new EH system
+        // is in place.
+        if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
+          continue;
+        const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
+
+        // If the personality functions match, then we can perform the
+        // inlining. Otherwise, we can't inline.
+        // TODO: This isn't 100% true. Some personality functions are proper
+        //       supersets of others and can be used in place of the other.
+        if (LP->getPersonalityFn() != CalleePersonality)
+          return false;
+
+        break;
+      }
+
   // Get an iterator to the last basic block in the function, which will have
   // the new function inlined after it.
   //
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
index dea7b02..88b2ffe 100644
--- a/lib/Transforms/Utils/LLVMBuild.txt
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = TransformUtils
 parent = Transforms
 required_libraries = Analysis Core IPA Support Target
-
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 134ab71..4dd93cf 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -494,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   if (Succ->getSinglePredecessor()) return true;
 
   // Make a list of the predecessors of BB
-  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
-  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
-  // Use that list to make another list of common predecessors of BB and Succ
-  BlockSet CommonPreds;
-  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
-       PI != PE; ++PI) {
-    BasicBlock *P = *PI;
-    if (BBPreds.count(P))
-      CommonPreds.insert(P);
-  }
+  SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
 
-  // Shortcut, if there are no common predecessors, merging is always safe
-  if (CommonPreds.empty())
-    return true;
-  
   // Look at all the phi nodes in Succ, to see if they present a conflict when
   // merging these blocks
   for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -520,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
     // merge the phi nodes and then the blocks can still be merged
     PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
     if (BBPN && BBPN->getParent() == BB) {
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
-        if (BBPN->getIncomingValueForBlock(*PI) 
-              != PN->getIncomingValueForBlock(*PI)) {
+      for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+        BasicBlock *IBB = PN->getIncomingBlock(PI);
+        if (BBPreds.count(IBB) &&
+            BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
           DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with " 
                 << BBPN->getName() << " with regard to common predecessor "
-                << (*PI)->getName() << "\n");
+                << IBB->getName() << "\n");
           return false;
         }
       }
     } else {
       Value* Val = PN->getIncomingValueForBlock(BB);
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
+      for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
         // See if the incoming value for the common predecessor is equal to the
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
-        if (Val != PN->getIncomingValueForBlock(*PI)) {
+        BasicBlock *IBB = PN->getIncomingBlock(PI);
+        if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
           DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with regard to common "
-                << "predecessor " << (*PI)->getName() << "\n");
+                << "predecessor " << IBB->getName() << "\n");
           return false;
         }
       }
@@ -748,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
     if (GV->isDeclaration()) return Align;
+    // If the memory we set aside for the global may not be the memory used by
+    // the final program then it is impossible for us to reliably enforce the
+    // preferred alignment.
+    if (GV->isWeakForLinker()) return Align;
     
     if (GV->getAlignment() >= PrefAlign)
       return GV->getAlignment();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8..4376265 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
     bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
     BasicBlock *InsertPreheaderForLoop(Loop *L);
-    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+                             BasicBlock *Preheader);
     BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (L->getNumBackEdges() < 8) {
-      if (SeparateNestedLoop(L, LPM)) {
+      if (SeparateNestedLoop(L, LPM, Preheader)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
         Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
       if (AA) AA->deleteValue(PN);
       if (SE) SE->forgetValue(PN);
       PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   }
 
   // Split out the loop pre-header.
-  BasicBlock *NewBB =
-    SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
-                           ".preheader", this);
+  BasicBlock *PreheaderBB;
+  if (!Header->isLandingPad()) {
+    PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+                                         this);
+  } else {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", 
+                                ".split-lp", this, NewBBs);
+    PreheaderBB = NewBBs[0];
+  }
 
-  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
-  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
-               << "\n");
+  PreheaderBB->getTerminator()->setDebugLoc(
+                                      Header->getFirstNonPHI()->getDebugLoc());
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+               << PreheaderBB->getName() << "\n");
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
-  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+  PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
 
-  return NewBB;
+  return PreheaderBB;
 }
 
 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                 this, NewBBs);
     NewExitBB = NewBBs[0];
   } else {
-    NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
-                                       LoopBlocks.size(), ".loopexit",
-                                       this);
+    NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
   }
 
   DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       if (AA) AA->deleteValue(PN);
@@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+                                       BasicBlock *Preheader) {
+  // Don't try to separate loops without a preheader (this excludes 
+  // loop headers which are targeted by an indirectbr).
+  if (!Preheader)
+    return 0;
+
+  // The header is not a landing pad; preheader insertion should ensure this.
+  assert(!L->getHeader()->isLandingPad() &&
+         "Can't insert backedge to landing pad");
+
   PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
   if (PN == 0) return 0;  // No known way to partition.
 
@@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   SmallVector<BasicBlock*, 8> OuterLoopPreds;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) != PN ||
-        !L->contains(PN->getIncomingBlock(i))) {
-      // We can't split indirectbr edges.
-      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
-        return 0;
-
+        !L->contains(PN->getIncomingBlock(i)))
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
-    }
 
   DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
 
@@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
     SE->forgetLoop(L);
 
   BasicBlock *Header = L->getHeader();
-  BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
-                                             OuterLoopPreds.size(),
-                                             ".outer", this);
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", this);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   if (!Preheader)
     return 0;
 
+  // The header is not a landing pad; preheader insertion should ensure this.
+  assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
   // Figure out which basic blocks contain back-edges to the loop header.
   std::vector<BasicBlock*> BackedgeBlocks;
   for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa2..b96f14b 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
 /// available it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                      unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+                      bool AllowRuntime, unsigned TripMultiple,
+                      LoopInfo *LI, LPPassManager *LPM) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     return false;
   }
 
-  // Notify ScalarEvolution that the loop will be substantially changed,
-  // if not outright eliminated.
-  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
-  if (SE)
-    SE->forgetLoop(L);
-
   if (TripCount != 0)
     DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
@@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // Are we eliminating the loop control altogether?
   bool CompletelyUnroll = Count == TripCount;
 
+  // We assume a run-time trip count if the compiler cannot
+  // figure out the loop trip count and the unroll-runtime
+  // flag is specified.
+  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+    return false;
+
+  // Notify ScalarEvolution that the loop will be substantially changed,
+  // if not outright eliminated.
+  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (SE)
+    SE->forgetLoop(L);
+
   // If we know the trip count, we know the multiple...
   unsigned BreakoutTrip = 0;
   if (TripCount != 0) {
@@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    } else if (RuntimeTripCount) {
+      DEBUG(dbgs() << " with run-time trip count");
     }
     DEBUG(dbgs() << "!\n");
   }
@@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     BasicBlock *Dest = Headers[j];
     bool NeedConditional = true;
 
+    if (RuntimeTripCount && j != 0) {
+      NeedConditional = false;
+    }
+
     // For a complete unroll, make the last iteration end with a branch
     // to the exit block.
     if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 0000000..b351852
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,374 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the 
+// run-time trip count modulo the unroll factor is not 0.  When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the 
+// unrolled loop to execute the 'left over' iterations.  Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled, 
+          "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+///   that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+///   for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+///   than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+                          BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+                          BasicBlock *OrigPH, BasicBlock *NewPH,
+                          ValueToValueMapTy &LVMap, Pass *P) {
+  BasicBlock *Latch = L->getLoopLatch();
+  assert(Latch != 0 && "Loop must have a latch");
+
+  // Create a PHI node for each outgoing value from the original loop
+  // (which means it is an outgoing value from the prolog code too).
+  // The new PHI node is inserted in the prolog end basic block.
+  // The new PHI name is added as an operand of a PHI node in either
+  // the loop header or the loop exit block.
+  for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+       SBI != SBE; ++SBI) {
+    for (BasicBlock::iterator BBI = (*SBI)->begin();
+         PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+      // Add a new PHI node to the prolog end block and add the
+      // appropriate incoming values.
+      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+                                       PrologEnd->getTerminator());
+      // Adding a value to the new PHI node from the original loop preheader.
+      // This is the value that skips all the prolog code.
+      if (L->contains(PN)) {
+        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+      } else {
+        NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+      }
+      Value *OrigVal = PN->getIncomingValueForBlock(Latch);
+      Value *V = OrigVal;
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (L->contains(I)) {
+          V = LVMap[I];
+        }
+      }
+      // Adding a value to the new PHI node from the last prolog block
+      // that was created.
+      NewPN->addIncoming(V, LastPrologBB);
+
+      // Update the existing PHI node operand with the value from the
+      // new PHI node.  How this is done depends on if the existing
+      // PHI node is in the original loop block, or the exit block.
+      if (L->contains(PN)) {
+        PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+      } else {
+        PN->addIncoming(NewPN, PrologEnd);
+      }
+    }
+  }
+
+  // Create a branch around the orignal loop, which is taken if the
+  // trip count is less than the unroll factor.
+  Instruction *InsertPt = PrologEnd->getTerminator();
+  Instruction *BrLoopExit =
+    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+                 ConstantInt::get(TripCount->getType(), Count));
+  BasicBlock *Exit = L->getUniqueExitBlock();
+  assert(Exit != 0 && "Loop must have a single exit block only");
+  // Split the exit to maintain loop canonicalization guarantees
+  SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+  if (!Exit->isLandingPad()) {
+    SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+  } else {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+                                P, NewBBs);
+  }
+  // Add the branch to the exit block (around the unrolled loop)
+  BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+  InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used.  VMap is
+/// for the values in the current loop instance.  LVMap contains
+/// the values from the last loop instance.  We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+                            bool FirstCopy,
+                            BasicBlock *InsertTop,
+                            BasicBlock *InsertBot,
+                            std::vector<BasicBlock *> &NewBlocks,
+                            LoopBlocksDFS &LoopBlocks,
+                            ValueToValueMapTy &VMap,
+                            ValueToValueMapTy &LVMap,
+                            LoopInfo *LI) {
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  Function *F = Header->getParent();
+  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+  // For each block in the original loop, create a new copy,
+  // and update the value map with the newly created values.
+  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+    NewBlocks.push_back(NewBB);
+
+    if (Loop *ParentLoop = L->getParentLoop())
+      ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+    VMap[*BB] = NewBB;
+    if (Header == *BB) {
+      // For the first block, add a CFG connection to this newly
+      // created block
+      InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+      // Change the incoming values to the ones defined in the
+      // previously cloned loop.
+      for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+        PHINode *NewPHI = cast<PHINode>(VMap[I]);
+        if (FirstCopy) {
+          // We replace the first phi node with the value from the preheader
+          VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+          NewBB->getInstList().erase(NewPHI);
+        } else {
+          // Update VMap with values from the previous block
+          unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+          Value *InVal = NewPHI->getIncomingValue(idx);
+          if (Instruction *I = dyn_cast<Instruction>(InVal))
+            if (L->contains(I))
+              InVal = LVMap[InVal];
+          NewPHI->setIncomingValue(idx, InVal);
+          NewPHI->setIncomingBlock(idx, InsertTop);
+        }
+      }
+    }
+
+    if (Latch == *BB) {
+      VMap.erase((*BB)->getTerminator());
+      NewBB->getTerminator()->eraseFromParent();
+      BranchInst::Create(InsertBot, NewBB);
+    }
+  }
+  // LastValueMap is updated with the values for the current loop
+  // which are used the next time this function is called.
+  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+       VI != VE; ++VI) {
+    LVMap[VI->first] = VI->second;
+  }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch 
+/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+///    extraiters = tripcount % loopfactor
+///    if (extraiters == 0) jump Loop:
+///    if (extraiters == loopfactor) jump L1
+///    if (extraiters == loopfactor-1) jump L2
+///    ...
+///    L1:  LoopBody;
+///    L2:  LoopBody;
+///    ...
+///    if tripcount < loopfactor jump End
+///    Loop:
+///    ...
+///    End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+                                   LPPassManager *LPM) {
+  // for now, only unroll loops that contain a single exit
+  SmallVector<BasicBlock*, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  if (ExitingBlocks.size() > 1)
+    return false;
+
+  // Make sure the loop is in canonical form, and there is a single
+  // exit block only.
+  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+    return false;
+
+  // Use Scalar Evolution to compute the trip count.  This allows more
+  // loops to be unrolled than relying on induction var simplification
+  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (SE == 0)
+    return false;
+
+  // Only unroll loops with a computable trip count and the trip count needs
+  // to be an int value (allowing a pointer type is a TODO item)
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+    return false;
+
+  // Add 1 since the backedge count doesn't include the first loop iteration
+  const SCEV *TripCountSC = 
+    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+  if (isa<SCEVCouldNotCompute>(TripCountSC))
+    return false;
+
+  // We only handle cases when the unroll factor is a power of 2.
+  // Count is the loop unroll factor, the number of extra copies added + 1.
+  if ((Count & (Count-1)) != 0)
+    return false;
+
+  // If this loop is nested, then the loop unroller changes the code in
+  // parent loop, so the Scalar Evolution pass needs to be run again
+  if (Loop *ParentLoop = L->getParentLoop())
+    SE->forgetLoop(ParentLoop);
+
+  BasicBlock *PH = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  // It helps to splits the original preheader twice, one for the end of the
+  // prolog code and one for a new loop preheader
+  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+  // Compute the number of extra iterations required, which is:
+  //  extra iterations = run-time trip count % (loop unroll factor + 1)
+  SCEVExpander Expander(*SE, "loop-unroll");
+  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+                                            PreHeaderBR);
+  Type *CountTy = TripCount->getType();
+  BinaryOperator *ModVal =
+    BinaryOperator::CreateURem(TripCount,
+                               ConstantInt::get(CountTy, Count),
+                               "xtraiter");
+  ModVal->insertBefore(PreHeaderBR);
+
+  // Check if for no extra iterations, then jump to unrolled loop
+  Value *BranchVal = new ICmpInst(PreHeaderBR,
+                                  ICmpInst::ICMP_NE, ModVal,
+                                  ConstantInt::get(CountTy, 0), "lcmp");
+  // Branch to either the extra iterations or the unrolled loop
+  // We will fix up the true branch label when adding loop body copies
+  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+  assert(PreHeaderBR->isUnconditional() && 
+         PreHeaderBR->getSuccessor(0) == PEnd && 
+         "CFG edges in Preheader are not correct");
+  PreHeaderBR->eraseFromParent();
+
+  ValueToValueMapTy LVMap;
+  Function *F = Header->getParent();
+  // These variables are used to update the CFG links in each iteration
+  BasicBlock *CompareBB = 0;
+  BasicBlock *LastLoopBB = PH;
+  // Get an ordered list of blocks in the loop to help with the ordering of the
+  // cloned blocks in the prolog code
+  LoopBlocksDFS LoopBlocks(L);
+  LoopBlocks.perform(LI);
+
+  //
+  // For each extra loop iteration, create a copy of the loop's basic blocks
+  // and generate a condition that branches to the copy depending on the
+  // number of 'left over' iterations.
+  //
+  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+    std::vector<BasicBlock*> NewBlocks;
+    ValueToValueMapTy VMap;
+
+    // Clone all the basic blocks in the loop, but we don't clone the loop
+    // This function adds the appropriate CFG connections.
+    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+                    LoopBlocks, VMap, LVMap, LI);
+    LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+    // Insert the cloned blocks into function just before the original loop
+    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+                                  NewBlocks[0], F->end());
+
+    // Generate the code for the comparison which determines if the loop
+    // prolog code needs to be executed.
+    if (leftOverIters == Count-1) {
+      // There is no compare block for the fall-thru case when for the last
+      // left over iteration
+      CompareBB = NewBlocks[0];
+    } else {
+      // Create a new block for the comparison
+      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+                                             F, CompareBB);
+      if (Loop *ParentLoop = L->getParentLoop()) {
+        // Add the new block to the parent loop, if needed
+        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      }
+
+      // The comparison w/ the extra iteration value and branch
+      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+                                      ConstantInt::get(CountTy, leftOverIters),
+                                      "un.tmp");
+      // Branch to either the extra iterations or the unrolled loop
+      BranchInst::Create(NewBlocks[0], CompareBB,
+                         BranchVal, NewBB);
+      CompareBB = NewBB;
+      PH->getTerminator()->setSuccessor(0, NewBB);
+      VMap[NewPH] = CompareBB;
+    }
+
+    // Rewrite the cloned instruction operands to use the values
+    // created when the clone is created.
+    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+             E = NewBlocks[i]->end(); I != E; ++I) {
+        RemapInstruction(I, VMap, 
+                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+      }
+    }
+  }
+
+  // Connect the prolog code to the original loop and update the
+  // PHI functions.
+  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+                LPM->getAsPass());
+  NumRuntimeUnrolled++;
+  return true;
+}
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 5e294a3..8491c55 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -19,7 +19,8 @@
 
 using namespace llvm;
 
-void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+static void appendToGlobalArray(const char *Array, 
+                                Module &M, Function *F, int Priority) {
   IRBuilder<> IRB(M.getContext());
   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
   StructType *Ty = StructType::get(
@@ -31,7 +32,7 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
   // Get the current set of static global constructors and add the new ctor
   // to the list.
   SmallVector<Constant *, 16> CurrentCtors;
-  if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) {
+  if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
     if (Constant *Init = GVCtor->getInitializer()) {
       unsigned n = Init->getNumOperands();
       CurrentCtors.reserve(n + 1);
@@ -51,6 +52,13 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
   // Create the new global variable and replace all uses of
   // the old global variable with the new one.
   (void)new GlobalVariable(M, NewInit->getType(), false,
-                           GlobalValue::AppendingLinkage, NewInit,
-                           "llvm.global_ctors");
+                           GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+  appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+  appendToGlobalArray("llvm.global_dtors", M, F, Priority);
 }
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e942..e8f4285 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -590,7 +590,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+      if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
         if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4..bf2cb49 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, it looks like the instruction IS in the "condition".  Check to
   // see if it's a cheap instruction to unconditionally compute, and if it
   // only uses stuff defined outside of the condition.  If so, hoist it out.
-  if (!I->isSafeToSpeculativelyExecute())
+  if (!isSafeToSpeculativelyExecute(I))
     return false;
 
   unsigned Cost = 0;
@@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   Instruction *BonusInst = 0;
   if (&*FrontIt != Cond &&
       FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
-      FrontIt->isSafeToSpeculativelyExecute()) {
+      isSafeToSpeculativelyExecute(FrontIt)) {
     BonusInst = &*FrontIt;
     ++FrontIt;
     
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f9..81eb9e0 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     /// runOnFunction - Remove instructions that simplify.
     bool runOnFunction(Function &F) {
       const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
       const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
       SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
       bool Changed = false;
 
@@ -60,7 +63,7 @@ namespace {
               continue;
             // Don't waste time simplifying unused instructions.
             if (!I->use_empty())
-              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+              if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
                 // Mark all uses for resimplification next time round the loop.
                 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
                      UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
 }
 
 char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
-                false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+                      "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+                    "Remove redundant instructions", false, false)
 char &llvm::InstructionSimplifierID = InstSimplifier::ID;
 
 // Public interface to the simplify instructions pass.
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index d7863f5..4fb5fd3 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -2110,3 +2110,6 @@ void Type::dump() const { print(dbgs()); }
 
 // Module::dump() - Allow printing of Modules from the debugger.
 void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index b849d3e..59424f9 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -38,105 +38,21 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     return false;
   Name = Name.substr(5); // Strip off "llvm."
 
-  FunctionType *FTy = F->getFunctionType();
-  Module *M = F->getParent();
-  
   switch (Name[0]) {
   default: break;
-  case 'a':
-    if (Name.startswith("atomic.cmp.swap") ||
-        Name.startswith("atomic.swap") ||
-        Name.startswith("atomic.load.add") ||
-        Name.startswith("atomic.load.sub") ||
-        Name.startswith("atomic.load.and") ||
-        Name.startswith("atomic.load.nand") ||
-        Name.startswith("atomic.load.or") ||
-        Name.startswith("atomic.load.xor") ||
-        Name.startswith("atomic.load.max") ||
-        Name.startswith("atomic.load.min") ||
-        Name.startswith("atomic.load.umax") ||
-        Name.startswith("atomic.load.umin"))
-      return true;
-  case 'i':
-    //  This upgrades the old llvm.init.trampoline to the new
-    //  llvm.init.trampoline and llvm.adjust.trampoline pair.
-    if (Name == "init.trampoline") {
-      // The new llvm.init.trampoline returns nothing.
-      if (FTy->getReturnType()->isVoidTy())
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
-
-      // Change the name of the old intrinsic so that we can play with its type.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(
-                               NameTmp,
-                               Type::getVoidTy(M->getContext()),
-                               FTy->getParamType(0), FTy->getParamType(1),
-                               FTy->getParamType(2), (Type *)0));
+  case 'c': {
+    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                        F->arg_begin()->getType());
       return true;
     }
-  case 'm':
-    if (Name == "memory.barrier")
-      return true;
-  case 'p':
-    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
-    //  which is a instruction / data cache identifier. The old version only
-    //  implicitly accepted the data version.
-    if (Name == "prefetch") {
-      // Don't do anything if it has the correct number of arguments already
-      if (FTy->getNumParams() == 4)
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
-      //  We first need to change the name of the old (bad) intrinsic, because
-      //  its type is incorrect, but we cannot overload that name. We
-      //  arbitrarily unique it here allowing us to construct a correctly named
-      //  and typed function below.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
-                                                    FTy->getReturnType(),
-                                                    FTy->getParamType(0),
-                                                    FTy->getParamType(1),
-                                                    FTy->getParamType(2),
-                                                    FTy->getParamType(2),
-                                                    (Type*)0));
+    if (Name.startswith("cttz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+                                        F->arg_begin()->getType());
       return true;
     }
-
-    break;
-  case 'x': {
-    const char *NewFnName = NULL;
-    // This fixes the poorly named crc32 intrinsics.
-    if (Name == "x86.sse42.crc32.8")
-      NewFnName = "llvm.x86.sse42.crc32.32.8";
-    else if (Name == "x86.sse42.crc32.16")
-      NewFnName = "llvm.x86.sse42.crc32.32.16";
-    else if (Name == "x86.sse42.crc32.32")
-      NewFnName = "llvm.x86.sse42.crc32.32.32";
-    else if (Name == "x86.sse42.crc64.8")
-      NewFnName = "llvm.x86.sse42.crc32.64.8";
-    else if (Name == "x86.sse42.crc64.64")
-      NewFnName = "llvm.x86.sse42.crc32.64.64";
-    
-    if (NewFnName) {
-      F->setName(NewFnName);
-      NewFn = F;
-      return true;
-    }
-
-    // Calls to these instructions are transformed into unaligned loads.
-    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
-        Name == "x86.sse2.loadu.pd")
-      return true;
-      
-    // Calls to these instructions are transformed into nontemporal stores.
-    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
-        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
-      return true;
-
     break;
   }
   }
@@ -169,190 +85,27 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
-  Function *F = CI->getCalledFunction();
-  LLVMContext &C = CI->getContext();
-  ImmutableCallSite CS(CI);
-
-  assert(F && "CallInst has no function associated with it.");
-
-  if (!NewFn) {
-    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-        F->getName() == "llvm.x86.sse2.loadu.dq" ||
-        F->getName() == "llvm.x86.sse2.loadu.pd") {
-      // Convert to a native, unaligned load.
-      Type *VecTy = CI->getType();
-      Type *IntTy = IntegerType::get(C, 128);
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
-                                        PointerType::getUnqual(IntTy),
-                                        "cast");
-      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
-      LI->setAlignment(1);      // Unaligned load.
-      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
-      // Fix up all the uses with our new load.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(BC);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
-               F->getName() == "llvm.x86.sse2.movnt.dq" ||
-               F->getName() == "llvm.x86.sse2.movnt.pd" ||
-               F->getName() == "llvm.x86.sse2.movnt.i") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Module *M = F->getParent();
-      SmallVector<Value *, 1> Elts;
-      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-      MDNode *Node = MDNode::get(C, Elts);
-
-      Value *Arg0 = CI->getArgOperand(0);
-      Value *Arg1 = CI->getArgOperand(1);
-
-      // Convert the type of the pointer to a pointer to the stored type.
-      Value *BC = Builder.CreateBitCast(Arg0,
-                                        PointerType::getUnqual(Arg1->getType()),
-                                        "cast");
-      StoreInst *SI = Builder.CreateStore(Arg1, BC);
-      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
-      SI->setAlignment(16);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-      Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
-                                               CI->getArgOperand(1),
-                                               CI->getArgOperand(2),
-                                               Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      AtomicRMWInst::BinOp Op;
-      if (F->getName().startswith("llvm.atomic.swap"))
-        Op = AtomicRMWInst::Xchg;
-      else if (F->getName().startswith("llvm.atomic.load.add"))
-        Op = AtomicRMWInst::Add;
-      else if (F->getName().startswith("llvm.atomic.load.sub"))
-        Op = AtomicRMWInst::Sub;
-      else if (F->getName().startswith("llvm.atomic.load.and"))
-        Op = AtomicRMWInst::And;
-      else if (F->getName().startswith("llvm.atomic.load.nand"))
-        Op = AtomicRMWInst::Nand;
-      else if (F->getName().startswith("llvm.atomic.load.or"))
-        Op = AtomicRMWInst::Or;
-      else if (F->getName().startswith("llvm.atomic.load.xor"))
-        Op = AtomicRMWInst::Xor;
-      else if (F->getName().startswith("llvm.atomic.load.max"))
-        Op = AtomicRMWInst::Max;
-      else if (F->getName().startswith("llvm.atomic.load.min"))
-        Op = AtomicRMWInst::Min;
-      else if (F->getName().startswith("llvm.atomic.load.umax"))
-        Op = AtomicRMWInst::UMax;
-      else if (F->getName().startswith("llvm.atomic.load.umin"))
-        Op = AtomicRMWInst::UMin;
-      else
-        llvm_unreachable("Unknown atomic");
-
-      Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
-                                           CI->getArgOperand(1),
-                                           Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.memory.barrier") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // Note that this conversion ignores the "device" bit; it was not really
-      // well-defined, and got abused because nobody paid enough attention to
-      // get it right. In practice, this probably doesn't matter; application
-      // code generally doesn't need anything stronger than
-      // SequentiallyConsistent (and realistically, SequentiallyConsistent
-      // is lowered to a strong enough barrier for almost anything).
-
-      if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
-        Builder.CreateFence(SequentiallyConsistent);
-      else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
-        Builder.CreateFence(Release);
-      else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
-        Builder.CreateFence(Acquire);
-      else
-        Builder.CreateFence(AcquireRelease);
+  assert(CI->getCalledFunction() && "Intrinsic call is not direct?");
+  if (!NewFn) return;
 
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else {
-      llvm_unreachable("Unknown function for CallInst upgrade.");
-    }
-    return;
-  }
+  LLVMContext &C = CI->getContext();
+  IRBuilder<> Builder(C);
+  Builder.SetInsertPoint(CI->getParent(), CI);
 
   switch (NewFn->getIntrinsicID()) {
-  case Intrinsic::prefetch: {
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI->getParent(), CI);
-    llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
-
-    // Add the extra "data cache" argument
-    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2),
-                           llvm::ConstantInt::get(I32Ty, 1) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands,
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }
-  case Intrinsic::init_trampoline: {
-
-    //  Transform
-    //    %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
-    //  to
-    //    call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
-    //    %tramp = call i8* llvm.adjust.trampoline (i8* %x)
-
-    Function *AdjustTrampolineFn =
-      cast<Function>(Intrinsic::getDeclaration(F->getParent(),
-                                               Intrinsic::adjust_trampoline));
-
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI);
-
-    Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                        CI->getArgOperand(2));
-
-    CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
-                                              CI->getArgOperand(0),
-                                              CI->getName());
-    if (!CI->use_empty())
-      CI->replaceAllUsesWith(AdjustCall);
+  default:
+    llvm_unreachable("Unknown function for CallInst upgrade.");
+
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+    assert(CI->getNumArgOperands() == 1 &&
+           "Mismatch between function args and call args");
+    StringRef Name = CI->getName();
+    CI->setName(Name + ".old");
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(), Name));
     CI->eraseFromParent();
-    break;
-  }
+    return;
   }
 }
 
@@ -378,291 +131,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   }
 }
 
-/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
-/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
-/// strips that use.
-void llvm::CheckDebugInfoIntrinsics(Module *M) {
-  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty())
-      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
-    FuncStart->eraseFromParent();
-  }
-  
-  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty())
-      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
-    StopPoint->eraseFromParent();
-  }
-
-  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty())
-      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
-    RegionStart->eraseFromParent();
-  }
-
-  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty())
-      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
-    RegionEnd->eraseFromParent();
-  }
-  
-  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
-    if (!Declare->use_empty()) {
-      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
-      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
-          !isa<MDNode>(DDI->getArgOperand(1))) {
-        while (!Declare->use_empty()) {
-          CallInst *CI = cast<CallInst>(Declare->use_back());
-          CI->eraseFromParent();
-        }
-        Declare->eraseFromParent();
-      }
-    }
-  }
-}
-
-/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic
-/// calls reachable from the unwind basic block.
-static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn,
-                                    CallInst *&Sel,
-                                    SmallPtrSet<BasicBlock*, 8> &Visited) {
-  if (!Visited.insert(BB)) return;
-
-  for (BasicBlock::iterator
-         I = BB->begin(), E = BB->end(); I != E; ++I) {
-    if (CallInst *CI = dyn_cast<CallInst>(I)) {
-      switch (CI->getCalledFunction()->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::eh_exception:
-        assert(!Exn && "Found more than one eh.exception call!");
-        Exn = CI;
-        break;
-      case Intrinsic::eh_selector:
-        assert(!Sel && "Found more than one eh.selector call!");
-        Sel = CI;
-        break;
-      }
-
-      if (Exn && Sel) return;
-    }
-  }
-
-  if (Exn && Sel) return;
-
-  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    FindExnAndSelIntrinsics(*I, Exn, Sel, Visited);
-    if (Exn && Sel) return;
-  }
-}
-
-/// TransferClausesToLandingPadInst - Transfer the exception handling clauses
-/// from the eh_selector call to the new landingpad instruction.
-static void TransferClausesToLandingPadInst(LandingPadInst *LPI,
-                                            CallInst *EHSel) {
-  LLVMContext &Context = LPI->getContext();
-  unsigned N = EHSel->getNumArgOperands();
-
-  for (unsigned i = N - 1; i > 1; --i) {
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert(FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N)
-        for (unsigned j = FirstCatch; j < N; ++j) {
-          Value *Val = EHSel->getArgOperand(j);
-          if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
-            LPI->addClause(EHSel->getArgOperand(j));
-          } else {
-            GlobalVariable *GV = cast<GlobalVariable>(Val);
-            LPI->addClause(GV->getInitializer());
-          }
-        }
-
-      if (!FilterLength) {
-        // Cleanup.
-        LPI->setCleanup(true);
-      } else {
-        // Filter.
-        SmallVector<Constant *, 4> TyInfo;
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j)));
-        ArrayType *AType =
-          ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() :
-                         PointerType::getUnqual(Type::getInt8Ty(Context)),
-                         TyInfo.size());
-        LPI->addClause(ConstantArray::get(AType, TyInfo));
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 2)
-    for (unsigned j = 2; j < N; ++j) {
-      Value *Val = EHSel->getArgOperand(j);
-      if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
-        LPI->addClause(EHSel->getArgOperand(j));
-      } else {
-        GlobalVariable *GV = cast<GlobalVariable>(Val);
-        LPI->addClause(GV->getInitializer());
-      }
-    }
-}
-
-/// This function upgrades the old pre-3.0 exception handling system to the new
-/// one. N.B. This will be removed in 3.1.
-void llvm::UpgradeExceptionHandling(Module *M) {
-  Function *EHException = M->getFunction("llvm.eh.exception");
-  Function *EHSelector = M->getFunction("llvm.eh.selector");
-  if (!EHException || !EHSelector)
-    return;
-
-  LLVMContext &Context = M->getContext();
-  Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context));
-  Type *SelTy = Type::getInt32Ty(Context);
-  Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL);
-
-  // This map links the invoke instruction with the eh.exception and eh.selector
-  // calls associated with it.
-  DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap;
-  for (Module::iterator
-         I = M->begin(), E = M->end(); I != E; ++I) {
-    Function &F = *I;
-
-    for (Function::iterator
-           II = F.begin(), IE = F.end(); II != IE; ++II) {
-      BasicBlock *BB = &*II;
-      InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator());
-      if (!Inst) continue;
-      BasicBlock *UnwindDest = Inst->getUnwindDest();
-      if (UnwindDest->isLandingPad()) continue; // Already converted.
-
-      SmallPtrSet<BasicBlock*, 8> Visited;
-      CallInst *Exn = 0;
-      CallInst *Sel = 0;
-      FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited);
-      assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!");
-      InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel);
-    }
-  }
-
-  // This map stores the slots where the exception object and selector value are
-  // stored within a function.
-  DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap;
-  SmallPtrSet<Instruction*, 32> DeadInsts;
-  for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
-         I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
-       I != E; ++I) {
-    InvokeInst *Invoke = I->first;
-    BasicBlock *UnwindDest = Invoke->getUnwindDest();
-    Function *F = UnwindDest->getParent();
-    std::pair<Value*, Value*> EHIntrinsics = I->second;
-    CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
-    CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-
-    // Store the exception object and selector value in the entry block.
-    Value *ExnSlot = 0;
-    Value *SelSlot = 0;
-    if (!FnToLPadSlotMap[F].first) {
-      BasicBlock *Entry = &F->front();
-      ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator());
-      SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator());
-      FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot);
-    } else {
-      ExnSlot = FnToLPadSlotMap[F].first;
-      SelSlot = FnToLPadSlotMap[F].second;
-    }
-
-    if (!UnwindDest->getSinglePredecessor()) {
-      // The unwind destination doesn't have a single predecessor. Create an
-      // unwind destination which has only one predecessor.
-      BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad",
-                                             UnwindDest->getParent());
-      BranchInst::Create(UnwindDest, NewBB);
-      Invoke->setUnwindDest(NewBB);
-
-      // Fix up any PHIs in the original unwind destination block.
-      for (BasicBlock::iterator
-             II = UnwindDest->begin(); isa<PHINode>(II); ++II) {
-        PHINode *PN = cast<PHINode>(II);
-        int Idx = PN->getBasicBlockIndex(Invoke->getParent());
-        if (Idx == -1) continue;
-        PN->setIncomingBlock(Idx, NewBB);
-      }
-
-      UnwindDest = NewBB;
-    }
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt());
-
-    Value *PersFn = Sel->getArgOperand(1);
-    LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0);
-    Value *LPExn = Builder.CreateExtractValue(LPI, 0);
-    Value *LPSel = Builder.CreateExtractValue(LPI, 1);
-    Builder.CreateStore(LPExn, ExnSlot);
-    Builder.CreateStore(LPSel, SelSlot);
-
-    TransferClausesToLandingPadInst(LPI, Sel);
-
-    DeadInsts.insert(Exn);
-    DeadInsts.insert(Sel);
-  }
-
-  // Replace the old intrinsic calls with the values from the landingpad
-  // instruction(s). These values were stored in allocas for us to use here.
-  for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
-         I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
-       I != E; ++I) {
-    std::pair<Value*, Value*> EHIntrinsics = I->second;
-    CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
-    CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-    BasicBlock *Parent = Exn->getParent();
-
-    std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()];
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(Parent, Exn);
-    LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load");
-    LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load");
-
-    Exn->replaceAllUsesWith(LPExn);
-    Sel->replaceAllUsesWith(LPSel);
-  }
-
-  // Remove the dead instructions.
-  for (SmallPtrSet<Instruction*, 32>::iterator
-         I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) {
-    Instruction *Inst = *I;
-    Inst->eraseFromParent();
-  }
-
-  // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the
-  // exception and selector values from the stored place.
-  Function *EHResume = M->getFunction("llvm.eh.resume");
-  if (!EHResume) return;
-
-  while (!EHResume->use_empty()) {
-    CallInst *Resume = cast<CallInst>(EHResume->use_back());
-    BasicBlock *BB = Resume->getParent();
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(BB, Resume);
-
-    Value *LPadVal =
-      Builder.CreateInsertValue(UndefValue::get(LPadSlotTy),
-                                Resume->getArgOperand(0), 0, "lpad.val");
-    LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1),
-                                        1, "lpad.val");
-    Builder.CreateResume(LPadVal);
-
-    // Remove all instructions after the 'resume.'
-    BasicBlock::iterator I = Resume;
-    while (I != BB->end()) {
-      Instruction *Inst = &*I++;
-      Inst->eraseFromParent();
-    }
-  }
-}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 0404297..99eeba1 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -37,5 +37,3 @@ add_llvm_library(LLVMCore
   ValueTypes.cpp
   Verifier.cpp
   )
-
-add_llvm_library_dependencies(LLVMCore LLVMSupport)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 30bae71..d1a9e7a 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -2209,7 +2209,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
            I != E; ++I)
         LastTy = *I;
 
-      if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+      if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
         SmallVector<Value*, 16> NewIndices;
         NewIndices.reserve(Idxs.size() + CE->getNumOperands());
         for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index cd94da1..a148912 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1398,14 +1398,22 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
 }
 
 Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
-  assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
-  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+  assert(C->getType()->getScalarType()->isPointerTy() &&
+         "PtrToInt source must be pointer or pointer vector");
+  assert(DstTy->getScalarType()->isIntegerTy() && 
+         "PtrToInt destination must be integer or integer vector");
+  assert(C->getType()->getNumElements() == DstTy->getNumElements() &&
+    "Invalid cast between a different number of vector elements");
   return getFoldedCast(Instruction::PtrToInt, C, DstTy);
 }
 
 Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
-  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
-  assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+  assert(C->getType()->getScalarType()->isIntegerTy() &&
+         "IntToPtr source must be integer or integer vector");
+  assert(DstTy->getScalarType()->isPointerTy() &&
+         "IntToPtr destination must be a pointer or pointer vector");
+  assert(C->getType()->getNumElements() == DstTy->getNumElements() &&
+    "Invalid cast between a different number of vector elements");
   return getFoldedCast(Instruction::IntToPtr, C, DstTy);
 }
 
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 73191c1..8c8fbf9 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -391,59 +391,6 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-bool Instruction::isSafeToSpeculativelyExecute() const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
-      if (C->canTrap())
-        return false;
-
-  switch (getOpcode()) {
-  default:
-    return true;
-  case UDiv:
-  case URem: {
-    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
-    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
-    return Op && !Op->isNullValue();
-  }
-  case SDiv:
-  case SRem: {
-    // x / y is undefined if y == 0, and might be undefined if y == -1,
-    // but calcuations like x / 3 are safe.
-    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
-    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
-  }
-  case Load: {
-    const LoadInst *LI = cast<LoadInst>(this);
-    if (!LI->isUnordered())
-      return false;
-    return LI->getPointerOperand()->isDereferenceablePointer();
-  }
-  case Call:
-    return false; // The called function could have undefined behavior or
-                  // side-effects.
-                  // FIXME: We should special-case some intrinsics (bswap,
-                  // overflow-checking arithmetic, etc.)
-  case VAArg:
-  case Alloca:
-  case Invoke:
-  case PHI:
-  case Store:
-  case Ret:
-  case Br:
-  case IndirectBr:
-  case Switch:
-  case Unwind:
-  case Unreachable:
-  case Fence:
-  case LandingPad:
-  case AtomicRMW:
-  case AtomicCmpXchg:
-  case Resume:
-    return false; // Misc instructions which have effects
-  }
-}
-
 Instruction *Instruction::clone() const {
   Instruction *New = clone_impl();
   New->SubclassOptionalData = SubclassOptionalData;
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index c8dcdc8..4784f0c 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1359,6 +1359,15 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 ///
 template <typename IndexTy>
 static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+  if (Ptr->isVectorTy()) {
+    assert(IdxList.size() == 1 &&
+      "GEP with vector pointers must have a single index");
+    PointerType *PTy = dyn_cast<PointerType>(
+        cast<VectorType>(Ptr)->getElementType());
+    assert(PTy && "Gep with invalid vector pointer found");
+    return PTy->getElementType();
+  }
+
   PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
   Type *Agg = PTy->getElementType();
@@ -1366,7 +1375,7 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
   // Handle the special case of the empty set index set, which is always valid.
   if (IdxList.empty())
     return Agg;
-  
+
   // If there is at least one index, the top level type must be sized, otherwise
   // it cannot be 'stepped over'.
   if (!Agg->isSized())
@@ -1396,6 +1405,19 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
   return getIndexedTypeInternal(Ptr, IdxList);
 }
 
+unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) {
+  Type *Ty = Ptr->getType();
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    Ty = VTy->getElementType();
+
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    return PTy->getAddressSpace();
+
+  assert(false && "Invalid GEP pointer type");
+  return 0;
+}
+
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
 /// zeros.  If so, the result pointer and the first operand have the same
 /// value, just potentially different types.
@@ -2005,6 +2027,8 @@ bool BinaryOperator::isExact() const {
 //                                CastInst Class
 //===----------------------------------------------------------------------===//
 
+void CastInst::anchor() {}
+
 // Just determine if this cast only deals with integral->integral conversion.
 bool CastInst::isIntegerCast() const {
   switch (getOpcode()) {
@@ -2652,9 +2676,15 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
     return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
       SrcLength == DstLength;
   case Instruction::PtrToInt:
-    return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+    if (SrcTy->getNumElements() != DstTy->getNumElements())
+      return false;
+    return SrcTy->getScalarType()->isPointerTy() &&
+           DstTy->getScalarType()->isIntegerTy();
   case Instruction::IntToPtr:
-    return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+    if (SrcTy->getNumElements() != DstTy->getNumElements())
+      return false;
+    return SrcTy->getScalarType()->isIntegerTy() &&
+           DstTy->getScalarType()->isPointerTy();
   case Instruction::BitCast:
     // BitCast implies a no-op cast of type only. No bits change.
     // However, you can't cast pointers to anything but pointers.
diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt
index 45f528e..bca8b2c 100644
--- a/lib/VMCore/LLVMBuild.txt
+++ b/lib/VMCore/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Library
 name = Core
 parent = Libraries
 required_libraries = Support
-
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ace4dc2..8debd7c 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -425,12 +425,12 @@ StringRef NamedMDNode::getName() const {
 // Instruction Metadata method implementations.
 //
 
-void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
   if (Node == 0 && !hasMetadata()) return;
   setMetadata(getContext().getMDKindID(Kind), Node);
 }
 
-MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return getMetadataImpl(getContext().getMDKindID(Kind));
 }
 
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 10184bc..469defd 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -46,6 +46,14 @@ Type *Type::getScalarType() {
   return this;
 }
 
+/// getNumElements - If this is a vector type, return the number of elements,
+/// otherwise return zero.
+unsigned Type::getNumElements() {
+  if (VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getNumElements();
+  return 0;
+}
+
 /// isIntegerTy - Return true if this is an IntegerType of the specified width.
 bool Type::isIntegerTy(unsigned Bitwidth) const {
   return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
@@ -664,6 +672,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
 }
 
 bool VectorType::isValidElementType(Type *ElemTy) {
+  if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
+    ElemTy = PTy->getElementType();
   return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
 }
 
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index f01fa34..5f35ce4 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -17,6 +17,8 @@ namespace llvm {
 //                                 User Class
 //===----------------------------------------------------------------------===//
 
+void User::anchor() {}
+
 // replaceUsesOfWith - Replaces all references to the "From" definition with
 // references to the "To" definition.
 //
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 291df91..a5f1918 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -108,6 +108,19 @@ bool Value::hasNUsesOrMore(unsigned N) const {
 /// isUsedInBasicBlock - Return true if this value is used in the specified
 /// basic block.
 bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  // Start by scanning over the instructions looking for a use before we start
+  // the expensive use iteration.
+  unsigned MaxBlockSize = 3;
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+      return true;
+    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+      break;
+  }
+
+  if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+    return false;
+
   for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
     const Instruction *User = dyn_cast<Instruction>(*I);
     if (User && User->getParent() == BB)
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 9564b7d..003de44 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1035,8 +1035,19 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->getScalarType()->isPointerTy(),
+          "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->getScalarType()->isIntegerTy(),
+          "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "PtrToInt type mismatch", &I);
+
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "PtrToInt Vector width mismatch", &I);
+  }
 
   visitInstruction(I);
 }
@@ -1046,9 +1057,18 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
-  Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
-
+  Assert1(SrcTy->getScalarType()->isIntegerTy(),
+          "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->getScalarType()->isPointerTy(),
+          "IntToPtr result must be a pointer",&I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "IntToPtr type mismatch", &I);
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "IntToPtr Vector width mismatch", &I);
+  }
   visitInstruction(I);
 }
 
@@ -1245,7 +1265,7 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to ICmp instruction are not of the same type!", &IC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
           "Invalid operand types for ICmp instruction", &IC);
   // Check that the predicate is valid.
   Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
@@ -1295,17 +1315,43 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
 }
 
 void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  Assert1(cast<PointerType>(GEP.getOperand(0)->getType())
-            ->getElementType()->isSized(),
+  Type *TargetTy = GEP.getPointerOperandType();
+  if (VectorType *VTy = dyn_cast<VectorType>(TargetTy))
+    TargetTy = VTy->getElementType();
+
+  Assert1(dyn_cast<PointerType>(TargetTy),
+    "GEP base pointer is not a vector or a vector of pointers", &GEP);
+  Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
           "GEP into unsized type!", &GEP);
-  
+
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs);
+    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
   Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
-  Assert2(GEP.getType()->isPointerTy() &&
-          cast<PointerType>(GEP.getType())->getElementType() == ElTy,
-          "GEP is not of right type for indices!", &GEP, ElTy);
+
+  if (GEP.getPointerOperandType()->isPointerTy()) {
+    // Validate GEPs with scalar indices.
+    Assert2(GEP.getType()->isPointerTy() &&
+           cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+           "GEP is not of right type for indices!", &GEP, ElTy);
+  } else {
+    // Validate GEPs with a vector index.
+    Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
+    Value *Index = Idxs[0];
+    Type  *IndexTy = Index->getType();
+    Assert1(IndexTy->isVectorTy(),
+      "Vector GEP must have vector indices!", &GEP);
+    Assert1(GEP.getType()->isVectorTy(),
+      "Vector GEP must return a vector value", &GEP);
+    Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
+    Assert1(ElemPtr->isPointerTy(),
+      "Vector GEP pointer operand is not a pointer!", &GEP);
+    unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
+    unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
+    Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+    Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
+      "Vector GEP type does not match pointer type!", &GEP);
+  }
   visitInstruction(GEP);
 }
 
@@ -1642,6 +1688,12 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   switch (ID) {
   default:
     break;
+  case Intrinsic::ctlz:  // llvm.ctlz
+  case Intrinsic::cttz:  // llvm.cttz
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "is_zero_undef argument of bit counting intrinsics must be a "
+            "constant int", &CI);
+    break;
   case Intrinsic::dbg_declare: {  // llvm.dbg.declare
     Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
                 "invalid llvm.dbg.declare intrinsic call 1", &CI);
diff --git a/projects/LLVMBuild.txt b/projects/LLVMBuild.txt
index 2d62f25..3c24d1a 100644
--- a/projects/LLVMBuild.txt
+++ b/projects/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Group
 name = Projects
 parent = $ROOT
-
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index ab15892..a19e87d 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -300,6 +300,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -434,6 +435,7 @@ else
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -545,14 +547,14 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, arm, mips, spu,
+     host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
      xcore, msp430, ptx, cbe, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CBackend CppBackend MBlaze PTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -564,6 +566,7 @@ case "$enableval" in
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
@@ -579,6 +582,7 @@ case "$enableval" in
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
diff --git a/projects/sample/configure b/projects/sample/configure
index dd4305d..860faa2 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -1401,9 +1401,8 @@ Optional Features:
                           (default is YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
-                          x86_64, sparc, powerpc, arm, mips, spu,
-                          xcore, msp430, ptx, cbe, and cpp
-                          (default=all)
+                          x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+                          xcore, msp430, ptx, cbe, and cpp (default=all)
   --enable-cbe-printf-a   Enable C Backend output with hex floating point via
                           %a (default is YES)
   --enable-bindings       Build specific language bindings:
@@ -3844,6 +3843,7 @@ else
   mips-*)                 llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -5045,6 +5045,8 @@ else
  ;;
     MSP430)      TARGET_HAS_JIT=0
  ;;
+    Hexagon)     TARGET_HAS_JIT=0
+ ;;
     MBlaze)      TARGET_HAS_JIT=0
  ;;
     PTX)         TARGET_HAS_JIT=0
@@ -5233,7 +5235,7 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CBackend CppBackend MBlaze PTX" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -5245,6 +5247,7 @@ case "$enableval" in
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
@@ -5260,6 +5263,7 @@ case "$enableval" in
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
diff --git a/runtime/LLVMBuild.txt b/runtime/LLVMBuild.txt
index ab7836b..05334fd 100644
--- a/runtime/LLVMBuild.txt
+++ b/runtime/LLVMBuild.txt
@@ -19,4 +19,3 @@
 type = Group
 name = Runtime
 parent = $ROOT
-
diff --git a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index 035299e..5b81c17 100644
--- a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -3,12 +3,12 @@
 ; RUN:   grep {ret i32 0}
 ; END.
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
 define i32 @test(i32* %P, i16* %Q) {
         %A = load i16* %Q               ; <i16> [#uses=1]
         %x = load i32* %P               ; <i32> [#uses=1]
-        %B = call i16 @llvm.cttz.i16( i16 %A )          ; <i16> [#uses=1]
+        %B = call i16 @llvm.cttz.i16( i16 %A, i1 true )          ; <i16> [#uses=1]
         %y = load i32* %P               ; <i32> [#uses=1]
         store i16 %B, i16* %Q
         %z = sub i32 %x, %y             ; <i32> [#uses=1]
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 8a8ac4f..48ef259 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -16,8 +16,8 @@ loop:
 
   %p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
 
-  volatile store double 0.0, double* %p3
-  volatile store double 0.1, double* %p.0.i.0
+  store volatile double 0.0, double* %p3
+  store volatile double 0.1, double* %p.0.i.0
 
   %i.next = add i64 %i, 1
   %cmp = icmp slt i64 %i.next, 3
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
index 9bc47ae..0ed4a2c 100644
--- a/test/Analysis/BasicAA/phi-and-select.ll
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -17,8 +17,8 @@ false:
 exit:
   %a = phi double* [ %x, %true ], [ %y, %false ]
   %b = phi double* [ %x, %false ], [ %y, %true ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -27,8 +27,8 @@ define void @bar(i1 %m, double* noalias %x, double* noalias %y) {
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %m, double* %y, double* %x
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
 
@@ -56,8 +56,8 @@ nfalse:
 
 nexit:
   %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -67,7 +67,7 @@ define void @fin(i1 %m, double* noalias %x, double* noalias %y,
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %n, double* %v, double* %w
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index 4f14a0d..ce0329d 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 ; PR2002
 
+; CHECK: Loop %loop: backedge-taken count is (100 + (-100 smax %n))
 define void @foo(i8 %n) {
 entry:
 	br label %loop
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index da35a6c..659cf4f 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -103,4 +103,22 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4)
 _ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
   ret void
-}
-\ No newline at end of file
+}
+
+; A single AddExpr exists for (%a + %b), which is not always <nsw>.
+; CHECK: @addnsw
+; CHECK-NOT: --> (%a + %b)<nsw>
+define i32 @addnsw(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %tmp = add i32 %a, %b
+  %cmp = icmp sgt i32 %tmp, 0
+  br i1 %cmp, label %greater, label %exit
+
+greater:
+  %tmp2 = add nsw i32 %a, %b
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %entry ], [ %tmp2, %greater ]
+  ret i32 %result
+}
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
deleted file mode 100644
index daffa3d..0000000
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | FileCheck %s
-
-
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
-declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone
-define void @test_loadu(i8* %a, double* %b) {
-  %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
-  %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
-  %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
-
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-  ret void
-}
-
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone 
-
-define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind
-
-define void @p(i8* %ptr) {
-; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
-  tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
-  ret void
-}
-
-declare i32 @nest_f(i8* nest, i32)
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-define void @test_trampolines() {
-; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null)
-; CHECK: call i8* @llvm.adjust.trampoline(i8* null)
-
-  call i8* @llvm.init.trampoline(i8* null,
-                                 i8* bitcast (i32 (i8*, i32)* @nest_f to i8*),
-                                 i8* null)
-  ret void
-}
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
new file mode 100644
index 0000000..7ad5cc3
--- /dev/null
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -0,0 +1,44 @@
+; Test to make sure intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare i8 @llvm.ctlz.i8(i8)
+declare i16 @llvm.ctlz.i16(i16)
+declare i32 @llvm.ctlz.i32(i32)
+declare i42 @llvm.ctlz.i42(i42)  ; Not a power-of-2
+
+define void @test.ctlz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.ctlz
+
+entry:
+  ; CHECK: call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+  call i8 @llvm.ctlz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.ctlz.i16(i16 %b, i1 false)
+  call i16 @llvm.ctlz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.ctlz.i32(i32 %c, i1 false)
+  call i32 @llvm.ctlz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.ctlz.i42(i42 %d, i1 false)
+  call i42 @llvm.ctlz.i42(i42 %d)
+
+  ret void
+}
+
+declare i8 @llvm.cttz.i8(i8)
+declare i16 @llvm.cttz.i16(i16)
+declare i32 @llvm.cttz.i32(i32)
+declare i42 @llvm.cttz.i42(i42)  ; Not a power-of-2
+
+define void @test.cttz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.cttz
+
+entry:
+  ; CHECK: call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  call i8 @llvm.cttz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.cttz.i16(i16 %b, i1 false)
+  call i16 @llvm.cttz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.cttz.i32(i32 %c, i1 false)
+  call i32 @llvm.cttz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.cttz.i42(i42 %d, i1 false)
+  call i42 @llvm.cttz.i42(i42 %d)
+
+  ret void
+}
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll
deleted file mode 100644
index a5af2b8..0000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; This isn't really an assembly file. It just runs test on bitcode to ensure
-; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | FileCheck %s 
-; CHECK-NOT: {i32 @\\.llvm\\.eh}
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc
deleted file mode 100644
index 1abe968..0000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/fbench-llvm-2.9.ll b/test/Bitcode/fbench-llvm-2.9.ll
deleted file mode 100644
index 5887691..0000000
--- a/test/Bitcode/fbench-llvm-2.9.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; This isn't really an assembly file. It just runs the test on the bitcode to 
-; ensure bitcode file backward compatibility.  No need for FileCheck as the 
-; BitcodeReader will fail with an assert if broken. This test case was 
-; generated using a clang binary, based on LLVM 2.9, downloaded from llvm.org.
-; RUN: llvm-dis < %s.bc > /dev/null
diff --git a/test/Bitcode/fbench-llvm-2.9.ll.bc b/test/Bitcode/fbench-llvm-2.9.ll.bc
deleted file mode 100644
index d2ebbe5..0000000
--- a/test/Bitcode/fbench-llvm-2.9.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/spirit-llvm-2.9.ll b/test/Bitcode/spirit-llvm-2.9.ll
deleted file mode 100644
index 5887691..0000000
--- a/test/Bitcode/spirit-llvm-2.9.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; This isn't really an assembly file. It just runs the test on the bitcode to 
-; ensure bitcode file backward compatibility.  No need for FileCheck as the 
-; BitcodeReader will fail with an assert if broken. This test case was 
-; generated using a clang binary, based on LLVM 2.9, downloaded from llvm.org.
-; RUN: llvm-dis < %s.bc > /dev/null
diff --git a/test/Bitcode/spirit-llvm-2.9.ll.bc b/test/Bitcode/spirit-llvm-2.9.ll.bc
deleted file mode 100644
index bf8cf49..0000000
--- a/test/Bitcode/spirit-llvm-2.9.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse42_crc32.ll b/test/Bitcode/sse42_crc32.ll
deleted file mode 100644
index ddb108d..0000000
--- a/test/Bitcode/sse42_crc32.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; Check to make sure old CRC32 intrinsics are auto-upgraded correctly.  
-; Auto-upgrade happens when parsing a .bc or a .ll file.  Thus, leave the test
-; case as a .ll file so we can see what's going on.
-;
-; Rdar: 9472944
-;
-; RUN: opt < %s -S | FileCheck %s
-
-; crc32.8 should upgrade to crc32.32.8
-; CHECK: i32 @llvm.x86.sse42.crc32.32.8(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.8(
-
-; crc32.16 should upgrade to crc32.32.16
-; CHECK: i32 @llvm.x86.sse42.crc32.32.16(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.16(
-
-; crc32.32 should upgrade to crc32.32.32
-; CHECK: i32 @llvm.x86.sse42.crc32.32.32(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.32(
-
-; crc64.8 should upgrade to crc32.64.8
-; CHECK: i64 @llvm.x86.sse42.crc32.64.8(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.8(
-
-; crc64.64 should upgrade to crc32.64.64
-; CHECK: i64 @llvm.x86.sse42.crc32.64.64(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.64(
-
-
-define void @foo() nounwind readnone ssp {
-entry:
-  %0 = call i32 @llvm.x86.sse42.crc32.8(i32 0, i8 0)
-  %1 = call i32 @llvm.x86.sse42.crc32.16(i32 0, i16 0)
-  %2 = call i32 @llvm.x86.sse42.crc32.32(i32 0, i32 0)
-  %3 = call i64 @llvm.x86.sse42.crc64.8(i64 0, i8 0)
-  %4 = call i64 @llvm.x86.sse42.crc64.64(i64 0, i64 0)
-  ret void
-}
-
-declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind readnone
-declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind readnone
-declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind readnone
-declare i64 @llvm.x86.sse42.crc64.8(i64, i8) nounwind readnone
-declare i64 @llvm.x86.sse42.crc64.64(i64, i64) nounwind readnone
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 78c6222..94c562b 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 8b29c15..ca88eed 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
-@x1 = internal global i8 1
-@x2 = internal global i8 1
-@x3 = internal global i8 1
-@x4 = internal global i8 1
-@x5 = global i8 1
+@x1 = internal global i8 1, align 1
+@x2 = internal global i8 1, align 1
+@x3 = internal global i8 1, align 1
+@x4 = internal global i8 1, align 1
+@x5 = global i8 1, align 1
 
 ; Check debug info output for merged global.
 ; DW_AT_location
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 1b5b8a9..091d037 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,12 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
 ; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
 
-%struct.config = type { i16, i16, i16, i16 }
-
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
 @read_buf = external global i32 (i8*, i32)*
 @window = external global [0 x i8]
 @lookahead = internal unnamed_addr global i32 0, align 4
-@eofile.b = internal unnamed_addr global i1 false
+@eofile.b = internal unnamed_addr global i32 0
 @ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
new file mode 100644
index 0000000..5409f8c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0.0 | FileCheck %s
+; rdar://10464621
+
+; DAG combine increases loads from packed types. ARM load / store optimizer then
+; combined them into a ldm which causes runtime exception.
+
+%struct.InformationBlock = type <{ i32, %struct.FlagBits, %struct.FlagBits }>
+%struct.FlagBits = type <{ [4 x i32] }>
+
+@infoBlock = external global %struct.InformationBlock
+
+define hidden void @foo() {
+; CHECK: foo:
+; CHECK: ldr.w
+; CHECK: ldr.w
+; CHECK-NOT: ldm
+entry:
+  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
+  %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
+  %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
+  %insert27 = insertvalue [4 x i32] %insert25, i32 %tmp19, 3
+  %insert = insertvalue [4 x i32] undef, i32 %tmp, 0
+  %insert7 = insertvalue [4 x i32] %insert, i32 %tmp3, 1
+  %insert9 = insertvalue [4 x i32] %insert7, i32 %tmp4, 2
+  %insert11 = insertvalue [4 x i32] %insert9, i32 %tmp5, 3
+  tail call void @bar([4 x i32] %insert27, [4 x i32] %insert11)
+  ret void
+}
+
+declare void @bar([4 x i32], [4 x i32])
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
new file mode 100644
index 0000000..86e8712
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:{{.*}}
+; CHECK:      movt    r1, :upper16:{{.*}}
+; CHECK:      vldmia  r1, {[[short0:s[0-9]+]], [[short1:s[0-9]+]], [[short2:s[0-9]+]], [[short3:s[0-9]+]]}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short3]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short2]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short1]]
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, [[short0]]
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:       movt  [[reg0]], :upper16:{{.*}}
+; CHECK:       vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
new file mode 100644
index 0000000..0c90f4c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+; <rdar://problem/10497732>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i32 1
+@x2 = internal global i64 12
+
+define i64 @f() {
+  %ax = load i32* @x1
+  %a = zext i32 %ax to i64
+  %b = load i64* @x2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; We can global-merge the i64 in theory, but the current code doesn't handle
+; the alignment correctly; for the moment, just check that we don't do it.
+; See also 
+
+; CHECK-NOT: MergedGlobals
+; CHECK: _x2
+; CHECK-NOT: MergedGlobals
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
new file mode 100644
index 0000000..5ce600d
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; Radar 10266272
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios4.0.0"
+; STATS-NOT: machine-sink
+
+define i32 @foo(i32 %h) nounwind readonly ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %cmp = icmp slt i32 0, %h
+  br i1 %cmp, label %for.body, label %if.end299
+
+for.body:                                         ; preds = %for.cond
+  %v.5 = select i1 undef, i32 undef, i32 0
+  %0 = load i8* undef, align 1, !tbaa !0
+  %conv88 = zext i8 %0 to i32
+  %sub89 = sub nsw i32 0, %conv88
+  %v.8 = select i1 undef, i32 undef, i32 %sub89
+  %1 = load i8* null, align 1, !tbaa !0
+  %conv108 = zext i8 %1 to i32
+  %2 = load i8* undef, align 1, !tbaa !0
+  %conv110 = zext i8 %2 to i32
+  %sub111 = sub nsw i32 %conv108, %conv110
+  %cmp112 = icmp slt i32 %sub111, 0
+  %sub115 = sub nsw i32 0, %sub111
+  %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
+  %add62 = add i32 0, %v.5
+  %add73 = add i32 %add62, 0
+  %add84 = add i32 %add73, 0
+  %add95 = add i32 %add84, %v.8
+  %add106 = add i32 %add95, 0
+  %add117 = add i32 %add106, %v.10
+  %add128 = add i32 %add117, 0
+  %add139 = add i32 %add128, 0
+  %add150 = add i32 %add139, 0
+  %add161 = add i32 %add150, 0
+  %add172 = add i32 %add161, 0
+  br i1 undef, label %for.cond, label %if.end299
+
+if.end299:                                        ; preds = %for.body, %for.cond
+  %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+  ret i32 %s.10
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 0f9543f..107e79a 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -26,7 +26,7 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
   %8 = load i32** %7                              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index e381e00..5b6a584 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
 define i32 @test(i32 %x) {
 ; CHECK: test
 ; CHECK: clz r0, r0
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
new file mode 100644
index 0000000..7f00eb3
--- /dev/null
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN:      .long _f151
+; DARWIN-NEXT: .long _f152
+
+; ELF:      .section .ctors,"aw",%progbits
+; ELF:      .long    f152
+; ELF-NEXT: .long    f151
+
+; GNUEABI:      .section .init_array,"aw",%init_array
+; GNUEABI:      .long    f151
+; GNUEABI-NEXT: .long    f152
+
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
+
+define void @f151() {
+entry:
+        ret void
+}
+
+define void @f152() {
+entry:
+        ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 1d2ced3..5ebca53 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: rbit
 ; CHECK: clz
-  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 8c9095e..325eea0 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 00346bf..695dbba 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -80,3 +80,49 @@ declare zeroext i16 @t6();
 declare signext i8 @t7();
 declare zeroext i8 @t8();
 declare zeroext i1 @t9();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ARM: @t10
+; ARM: movw r0, #0
+; ARM: movw r1, #248
+; ARM: movw r2, #187
+; ARM: movw r3, #28
+; ARM: movw r9, #40
+; ARM: movw r12, #186
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: uxtb r2, r2
+; ARM: uxtb r3, r3
+; ARM: uxtb r9, r9
+; ARM: str r9, [sp]
+; ARM: uxtb r9, r12
+; ARM: str r9, [sp, #4]
+; ARM: bl _bar
+; THUMB: @t10
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: movs r1, #248
+; THUMB: movt r1, #0
+; THUMB: movs r2, #187
+; THUMB: movt r2, #0
+; THUMB: movs r3, #28
+; THUMB: movt r3, #0
+; THUMB: movw r9, #40
+; THUMB: movt r9, #0
+; THUMB: movw r12, #186
+; THUMB: movt r12, #0
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: uxtb r2, r2
+; THUMB: uxtb r3, r3
+; THUMB: uxtb.w r9, r9
+; THUMB: str.w r9, [sp]
+; THUMB: uxtb.w r9, r12
+; THUMB: str.w r9, [sp, #4]
+; THUMB: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
new file mode 100644
index 0000000..028d940
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+; Target-specific selector can't properly handle the double because it isn't
+; being passed via a register, so the materialized arguments become dead code.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; THUMB: main
+  call void @printArgsNoRet(i32 1, float 0x4000CCCCC0000000, i8 signext 99, double 4.100000e+00)
+; THUMB: blx _printArgsNoRet
+; THUMB-NOT: ldr
+; THUMB-NOT: vldr
+; THUMB-NOT: vmov
+; THUMB-NOT: ldr
+; THUMB-NOT: sxtb
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: add sp, #32
+; THUMb: pop {r7, pc}
+  ret i32 0
+}
+
+declare void @printArgsNoRet(i32 %a1, float %a2, i8 signext %a3, double %a4)
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 465e85f..648d711 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -158,3 +158,64 @@ define void @test4() {
 ; ARM: ldr r1, [r1]
 ; ARM: str r0, [r1]
 }
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @test5(double %a, double %b) nounwind {
+entry:
+; ARM: @test5
+; THUMB: @test5
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 227e4e8..1ba561d 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -3,7 +3,7 @@
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
   %conv = zext i32 %a to i64
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv, i1 true)
 ; CHECK: clz
 ; CHECK-NOT: adds
   %cast = trunc i64 %tmp1 to i32
@@ -11,4 +11,4 @@ entry:
   ret i32 %sub
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 28bf221..1732df3 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -14,7 +14,7 @@
 ; offset.  Having the starting offset in range is not sufficient.
 ; When this works properly, @g3 is placed in a separate chunk of merged globals.
 ; CHECK: _MergedGlobals1:
-@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
 
 ; Global variables that can be placed in BSS should be kept together in a
 ; separate pool of merged globals.
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 5e7e3f2..eb71149 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -70,6 +70,5 @@ define i32 @test1() {
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
 ; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index cb5243c..2fcc45f 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -110,3 +110,13 @@ entry:
   call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
+
+; Radar 10551006
+
+define <4 x i32> @t11(i32* %p) nounwind {
+entry:
+; CHECK: t11
+; CHECK: vld1.s32 {d16[], d17[]}, [r0]
+  %0 = tail call <4 x i32> asm "vld1.s32 {${0:e}[], ${0:f}[]}, [$1]", "=w,r"(i32* %p) nounwind
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index d5aac2e..a99a7ec 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -25,8 +25,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -38,8 +38,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index c218ff5..b892d2d 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldr | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstr
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vadd.i16 d
+; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
@@ -12,6 +15,11 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vsub.i16 d
+; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 130277b..944bfe0 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vadd.i64 q
+; CHECK: vstmia
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -12,6 +15,12 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vsub.i64 q
+; CHECK: vmov r0, r1, d
+; CHECK: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 0396a41..be95657 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -371,3 +371,13 @@ entry:
   store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
   ret void
 }
+
+define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32_undef:
+;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
+  %a = load <4 x float> *%p
+  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
+  store <4 x float> %b, <4 x float> *%p
+  ret void
+}
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
deleted file mode 100644
index dd505af..0000000
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c | grep volatile
-
-define void @test(i32* %P) {
-        %X = volatile load i32* %P              ; <i32> [#uses=1]
-        volatile store i32 %X, i32* %P
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
deleted file mode 100644
index 99de837..0000000
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
-
-@G = external global void ()*           ; <void ()**> [#uses=2]
-
-define void @test() {
-        volatile store void ()* @test, void ()** @G
-        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
index 6e0cf68..e6eeba3 100644
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
@@ -8,8 +8,8 @@ define void @test(i32* %P) {
 }
 
 define void @test2(i32* %P) {
-        %X = volatile load i32* %P, align 2
-        volatile store i32 %X, i32* %P, align 2
+        %X = load volatile i32* %P, align 2
+        store volatile i32 %X, i32* %P, align 2
         ret void
 }
 
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 288bfd2..9f10206 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -4,8 +4,8 @@
 
 define i32 @main(i64 %arg) nounwind  {
 entry:
-	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
-	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
 	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
 	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
 	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
@@ -16,6 +16,6 @@ entry:
 
 declare i32 @printf(i8* noalias , ...) nounwind 
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone 
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 1db7549..abcdb9b 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -21,19 +21,19 @@ define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %C
 	ret void
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
 define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.ctlz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctlz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctlz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctlz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
@@ -41,19 +41,19 @@ define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP
 	ret void
 }
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
 define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.cttz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.cttz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.cttz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
new file mode 100644
index 0000000..b882cf7
--- /dev/null
+++ b/test/CodeGen/Hexagon/args.ll
@@ -0,0 +1,19 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = #7
+; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: r0 = #1
+; CHECK: r1 = #2
+; CHECK: r2 = #3
+; CHECK: r3 = #4
+; CHECK: r4 = #5
+; CHECK: r5 = #6
+
+
+define void @foo() nounwind {
+entry:
+  call void @bar(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+  ret void
+}
+
+declare void @bar(i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
new file mode 100644
index 0000000..36abd74
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -0,0 +1,18 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i64
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i64* @k, align 8
+  %conv = trunc i64 %1 to i32
+  %2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
+  store i64 %2, i64* @k, align 8
+  ret void
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/dg.exp b/test/CodeGen/Hexagon/dg.exp
new file mode 100644
index 0000000..89f45e6
--- /dev/null
+++ b/test/CodeGen/Hexagon/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Hexagon] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
new file mode 100644
index 0000000..04c2ec1
--- /dev/null
+++ b/test/CodeGen/Hexagon/double.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_adddf3
+; CHECK: __hexagon_subdf3
+
+define void @foo(double* %acc, double %num, double %num2) nounwind {
+entry:
+  %acc.addr = alloca double*, align 4
+  %num.addr = alloca double, align 8
+  %num2.addr = alloca double, align 8
+  store double* %acc, double** %acc.addr, align 4
+  store double %num, double* %num.addr, align 8
+  store double %num2, double* %num2.addr, align 8
+  %0 = load double** %acc.addr, align 4
+  %1 = load double* %0
+  %2 = load double* %num.addr, align 8
+  %add = fadd double %1, %2
+  %3 = load double* %num2.addr, align 8
+  %sub = fsub double %add, %3
+  %4 = load double** %acc.addr, align 4
+  store double %sub, double* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
new file mode 100644
index 0000000..51acf2e
--- /dev/null
+++ b/test/CodeGen/Hexagon/float.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+  %acc.addr = alloca float*, align 4
+  %num.addr = alloca float, align 4
+  %num2.addr = alloca float, align 4
+  store float* %acc, float** %acc.addr, align 4
+  store float %num, float* %num.addr, align 4
+  store float %num2, float* %num2.addr, align 4
+  %0 = load float** %acc.addr, align 4
+  %1 = load float* %0
+  %2 = load float* %num.addr, align 4
+  %add = fadd float %1, %2
+  %3 = load float* %num2.addr, align 4
+  %sub = fsub float %add, %3
+  %4 = load float** %acc.addr, align 4
+  store float %sub, float* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
new file mode 100644
index 0000000..c0a9fda
--- /dev/null
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -0,0 +1,24 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@num2 = external global i32
+
+; CHECK: allocframe
+; CHECK: dealloc_return
+
+define i32 @foo() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32* @num, align 4
+  store i32 %0, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %2 = load i32* @acc, align 4
+  %mul = mul nsw i32 %1, %2
+  %3 = load i32* @num2, align 4
+  %add = add nsw i32 %mul, %3
+  store i32 %add, i32* %i, align 4
+  %4 = load i32* %i, align 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
new file mode 100644
index 0000000..afd6fc6
--- /dev/null
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -0,0 +1,20 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: += mpyi
+
+define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
+entry:
+  %acc.addr = alloca i32, align 4
+  %num.addr = alloca i32, align 4
+  %num2.addr = alloca i32, align 4
+  store i32 %acc, i32* %acc.addr, align 4
+  store i32 %num, i32* %num.addr, align 4
+  store i32 %num2, i32* %num2.addr, align 4
+  %0 = load i32* %num.addr, align 4
+  %1 = load i32* %acc.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* %num2.addr, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* %num.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
new file mode 100644
index 0000000..c251bd4
--- /dev/null
+++ b/test/CodeGen/Hexagon/static.ll
@@ -0,0 +1,21 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@val = external global i32
+
+; CHECK: CONST32(#acc)
+; CHECK: CONST32(#val)
+; CHECK: CONST32(#num)
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @num, align 4
+  %1 = load i32* @acc, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* @val, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* @num, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
new file mode 100644
index 0000000..2c962d0
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -0,0 +1,16 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+
+%struct.small = type { i32, i32 }
+
+@s1 = common global %struct.small zeroinitializer, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+  call void @bar(i64 %0)
+  ret void
+}
+
+declare void @bar(i64)
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
new file mode 100644
index 0000000..69de4f6
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
+; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
+; CHECK: memw(r29 + #0) = r[[T1]]
+
+%struct.large = type { i64, i64 }
+
+@s2 = common global %struct.large zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  call void @bar(%struct.large* byval @s2)
+  ret void
+}
+
+declare void @bar(%struct.large* byval)
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
new file mode 100644
index 0000000..788e474
--- /dev/null
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i32
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
+  store i32 %2, i32* @k, align 4
+  ret void
+}
+
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index f339373..4c7d2d0 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,9 +7,9 @@ target triple = "msp430-unknown-linux-gnu"
 
 define void @uip_arp_arpin() nounwind {
 entry:
-	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%tmp = load volatile i16* @uip_len		; <i16> [#uses=1]
 	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
-	volatile store i16 0, i16* @uip_len
+	store volatile i16 0, i16* @uip_len
 	br i1 %cmp, label %if.then, label %if.end
 
 if.then:		; preds = %entry
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index 088d3e1..e8c0d14 100644
--- a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -6,8 +6,8 @@ target triple = "msp430-generic-generic"
 define i16 @foo() nounwind readnone {
 entry:
   %result = alloca i16, align 1                   ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result
-  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  store volatile i16 0, i16* %result
+  %tmp = load volatile i16* %result               ; <i16> [#uses=1]
   ret i16 %tmp
 }
 
@@ -22,8 +22,8 @@ while.cond:                                       ; preds = %while.cond, %entry
 
 while.end:                                        ; preds = %while.cond
   %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result.i
-  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  store volatile i16 0, i16* %result.i
+  %tmp.i = load volatile i16* %result.i           ; <i16> [#uses=0]
   ret i16 0
 }
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 4d7d9b9..9fab482 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,10 +11,10 @@ entry:
   %x.addr = alloca i8                             ; <i8*> [#uses=2]
   %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
   store i8 %x, i8* %x.addr
-  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  %tmp1 = load volatile i8* @"\010x0021"          ; <i8> [#uses=1]
   store i8 %tmp1, i8* %tmp
   %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
-  volatile store i8 %tmp2, i8* @"\010x0021"
+  store volatile i8 %tmp2, i8* @"\010x0021"
   %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
   store i8 %tmp3, i8* %retval
   %0 = load i8* %retval                           ; <i8> [#uses=1]
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index 856eb9d..c1a186a 100644
--- a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,9 +4,9 @@ define void @foo() nounwind {
 entry:
 	%r = alloca i8		; <i8*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	load volatile i8* %r, align 1		; <i8>:0 [#uses=1]
 	or i8 %0, 1		; <i8>:1 [#uses=1]
-	volatile store i8 %1, i8* %r, align 1
+	store volatile i8 %1, i8* %r, align 1
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 4f9a724..c7ecb5a 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -32,7 +32,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 17ebd87..727c29f 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -35,9 +35,9 @@ define void @am3(i16 %i, i8 %x) nounwind {
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %x, %1
-	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 6676b88..7cd345b 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -29,7 +29,7 @@ define i8 @am3(i16 %n) nounwind {
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index 4b327b0..5eeb02f 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -29,7 +29,7 @@ define void @am3(i16 %i, i8 %a) nounwind {
 ; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
-	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %a, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9d8e391..c61e1cd 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,19 +1,22 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep subu %t | count 2
-; RUN: grep addu %t | count 4
-
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i64 @add64(i64 %u, i64 %v) nounwind  {
 entry:
-	%tmp2 = add i64 %u, %v	
+; CHECK: addu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: addu
+  %tmp2 = add i64 %u, %v  
   ret i64 %tmp2
 }
 
 define i64 @sub64(i64 %u, i64 %v) nounwind  {
 entry:
+; CHECK: sub64
+; CHECK: subu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: subu
   %tmp2 = sub i64 %u, %v
   ret i64 %tmp2
 }
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index b1d20d9..afec7f6 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,17 +1,18 @@
-; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc -march=mips < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.sret0 = type { i32, i32, i32 }
+%struct.sret0 = type { i32, i32, i32 }
 
 define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
 entry:
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
-	store i32 %dummy, i32* %0, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	store i32 %dummy, i32* %1, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
-	store i32 %dummy, i32* %2, align 4
-	ret void
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 0    ; <i32*>:0 [#uses=1]
+  store i32 %dummy, i32* %0, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 1    ; <i32*>:1 [#uses=1]
+  store i32 %dummy, i32* %1, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 2    ; <i32*>:2 [#uses=1]
+  store i32 %dummy, i32* %2, align 4
+  ret void
 }
 
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index d804c7d..4c55236 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,16 +1,17 @@
-; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @fptoint(float %a) nounwind {
 entry:
-	fptosi float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: trunc.w.s 
+  fptosi float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
 
 define i32 @fptouint(float %a) nounwind {
 entry:
-	fptoui float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: fptouint
+; CHECK: trunc.w.s 
+; CHECK: trunc.w.s 
+  fptoui float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 94dfe35..a8e5470 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
-; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @F(float %a) nounwind {
+; CHECK: .rodata.cst4,"aM",@progbits 
 entry:
-	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
-	ret float %1
+; CHECK: ($CPI0_{{[0-1]}})
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+  fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+  fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+  ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index 23ed64a..f701bf1 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mfhi  %t | count 1
-; RUN: grep mflo  %t | count 1
-; RUN: grep multu %t | count 1
+; RUN: llc -march=mips < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.DWstruct = type { i32, i32 }
+%struct.DWstruct = type { i32, i32 }
 
 define i32 @A0(i32 %u, i32 %v) nounwind  {
 entry:
-	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
-	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
-	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+; CHECK: multu 
+; CHECK: mflo
+; CHECK: mfhi
+  %asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+  %asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+  %asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1    ; <i32> [#uses=1]
   %res = add i32 %asmresult, %asmresult1
-	ret i32 %res
+  ret i32 %res
 }
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f8eb028..78a49ff 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,18 +1,15 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mtc1 %t | count 1
-; RUN: grep mfc1 %t | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @A(i32 %u) nounwind  {
 entry:
-	bitcast i32 %u to float
-	ret float %0
+; CHECK: mtc1 
+  bitcast i32 %u to float
+  ret float %0
 }
 
 define i32 @B(float %u) nounwind  {
 entry:
-	bitcast float %u to i32
-	ret i32 %0
+; CHECK: mfc1 
+  bitcast float %u to i32
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 6dd4af1..0d94b19 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
-; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
-	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
-	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
-	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
-	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
-	add i32 %3, %2		; <i32>:4 [#uses=1]
-	ret i32 %4
+; CHECK: subu ${{[0-9]+}}, $sp
+; CHECK: subu ${{[0-9]+}}, $sp
+  alloca i8, i32 %size    ; <i8*>:0 [#uses=1]
+  alloca i8, i32 %size    ; <i8*>:1 [#uses=1]
+  call i32 @foo( i8* %0 ) nounwind    ; <i32>:2 [#uses=1]
+  call i32 @foo( i8* %1 ) nounwind    ; <i32>:3 [#uses=1]
+  add i32 %3, %2    ; <i32>:4 [#uses=1]
+  ret i32 %4
 }
 
 declare i32 @foo(i8*)
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index fb33323..abd61de 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=mips | grep clz | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @A0(i32 %u) nounwind  {
 entry:
-	call i32 @llvm.ctlz.i32( i32 %u )
+; CHECK: clz 
+  call i32 @llvm.ctlz.i32( i32 %u, i1 true )
   ret i32 %0
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 5425bdf..23b5349 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,13 +1,21 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
 
 define i32 @main() nounwind readnone {
 entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
-  volatile store i32 2, i32* %x, align 4
-  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
-; CHECK: lui $3, %hi($JTI0_0)
-; CHECK: addiu $3, $3, %lo($JTI0_0)
-; CHECK: sll $2, $2, 2
+  store volatile i32 2, i32* %x, align 4
+  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 2
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -18,7 +26,7 @@ entry:
 bb1:                                              ; preds = %entry
   ret i32 2
 
-; CHECK: $BB0_2
+; CHECK: STATIC-O32: $BB0_2
 bb2:                                              ; preds = %entry
   ret i32 0
 
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
index c592b31..6174500 100644
--- a/test/CodeGen/Mips/2010-11-09-CountLeading.ll
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -3,16 +3,16 @@
 ; CHECK: clz $2, $4
 define i32 @t1(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 ; CHECK: clz $2, $4
 define i32 @t2(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
@@ -20,7 +20,7 @@ entry:
 define i32 @t3(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
 
@@ -28,6 +28,6 @@ entry:
 define i32 @t4(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
new file mode 100644
index 0000000..1b5513a
--- /dev/null
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
+entry:
+  br label %bosco
+
+bosco:                                            ; preds = %bosco, %entry
+  br label %bosco
+}
+
+; CHECK-PIC: b	$BB0_1
+; CHECK-STATIC: j	$BB0_1
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index acd55b2..03254a9 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,11 +1,14 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
-; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; O32:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; N64:  ld  ${{[0-9]+}}, %got_disp(i3)($gp)
+; N64:  daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -17,10 +20,14 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; CHECK: cmov2:
-; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
-; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; O32: cmov2:
+; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; N64: cmov2:
+; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 69f53e5..a164f70 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index ae49e70..950c437 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,34 +1,42 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=MIPS32-EL
+; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=MIPS32-EB
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=MIPS64
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; CHECK-EL: func0:
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f15
-; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f13
-; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI1]], $[[MSK0]]
-; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; CHECK-EL: mtc1 $[[LO0]], $f0
-; CHECK-EL: mtc1 $[[OR]], $f1
+; MIPS32-EL: func0:
+; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
+; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; MIPS32-EL: mfc1 $[[HI0:[0-9]+]], $f15
+; MIPS32-EL: and $[[AND1:[0-9]+]], $[[HI0]], $[[MSK1]]
+; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
+; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; MIPS32-EL: mfc1 $[[HI1:[0-9]+]], $f13
+; MIPS32-EL: and $[[AND0:[0-9]+]], $[[HI1]], $[[MSK0]]
+; MIPS32-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; MIPS32-EL: mfc1 $[[LO0:[0-9]+]], $f12
+; MIPS32-EL: mtc1 $[[LO0]], $f0
+; MIPS32-EL: mtc1 $[[OR]], $f1
 ;
-; CHECK-EB: lui $[[T1:[0-9]+]], 32768
-; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EB: lui $[[T0:[0-9]+]], 32767
-; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; CHECK-EB: mtc1 $[[OR]], $f0
-; CHECK-EB: mtc1 $[[LO0]], $f1
+; MIPS32-EB: lui $[[T1:[0-9]+]], 32768
+; MIPS32-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; MIPS32-EB: mfc1 $[[HI1:[0-9]+]], $f14
+; MIPS32-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
+; MIPS32-EB: lui $[[T0:[0-9]+]], 32767
+; MIPS32-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; MIPS32-EB: mfc1 $[[HI0:[0-9]+]], $f12
+; MIPS32-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
+; MIPS32-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; MIPS32-EB: mfc1 $[[LO0:[0-9]+]], $f13
+; MIPS32-EB: mtc1 $[[OR]], $f0
+; MIPS32-EB: mtc1 $[[LO0]], $f1
+
+; MIPS64: dmfc1 $[[R0:[0-9]+]], $f13
+; MIPS64: and $[[R1:[0-9]+]], $[[R0]], ${{[0-9]+}}
+; MIPS64: dmfc1 $[[R2:[0-9]+]], $f12
+; MIPS64: and $[[R3:[0-9]+]], $[[R2]], ${{[0-9]+}}
+; MIPS64: or  $[[R4:[0-9]+]], $[[R3]], $[[R1]]
+; MIPS64: dmtc1 $[[R4]], $f0
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -37,17 +45,17 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; CHECK-EL: func1:
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; CHECK-EL: mtc1 $[[T4]], $f0
+; MIPS32-EL: func1:
+; MIPS32-EL: lui $[[T1:[0-9]+]], 32768
+; MIPS32-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
+; MIPS32-EL: mfc1 $[[ARG1:[0-9]+]], $f14
+; MIPS32-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
+; MIPS32-EL: lui $[[T0:[0-9]+]], 32767
+; MIPS32-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
+; MIPS32-EL: mfc1 $[[ARG0:[0-9]+]], $f12
+; MIPS32-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
+; MIPS32-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
+; MIPS32-EL: mtc1 $[[T4]], $f0
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
diff --git a/test/CodeGen/Mips/indirectcall.ll b/test/CodeGen/Mips/indirectcall.ll
new file mode 100644
index 0000000..ac565d6
--- /dev/null
+++ b/test/CodeGen/Mips/indirectcall.ll
@@ -0,0 +1,8 @@
+; RUN: llc  < %s -march=mipsel -relocation-model=static | FileCheck %s 
+
+define void @foo0(void (i32)* nocapture %f1) nounwind {
+entry:
+; CHECK: jalr $25
+  tail call void %f1(i32 13) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index fcc20f7..2333f07 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 %struct.S1 = type { [65536 x i8] }
 
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
new file mode 100644
index 0000000..33af0d8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+
+define i64 @zext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; CHECK: dsll32 $[[R1:[0-9]+]], $[[R0]], 0
+; CHECK: dsrl32  ${{[0-9]+}}, $[[R1]], 0
+  %add = add i32 %a, 2
+  %conv = zext i32 %add to i64
+  ret i64 %conv
+}
diff --git a/test/CodeGen/Mips/mips64extins.ll b/test/CodeGen/Mips/mips64extins.ll
new file mode 100644
index 0000000..14f92ca
--- /dev/null
+++ b/test/CodeGen/Mips/mips64extins.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s 
+
+define i64 @dext(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 1023
+  ret i64 %and
+}
+
+define i64 @dextm(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 34
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 17179869183
+  ret i64 %and
+}
+
+define i64 @dextu(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 34, 6
+  %shr = lshr i64 %i, 34
+  %and = and i64 %shr, 63
+  ret i64 %and
+}
+
+define i64 @dins(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10
+  %shl2 = shl i64 %j, 8
+  %and = and i64 %shl2, 261888
+  %and3 = and i64 %i, -261889
+  %or = or i64 %and3, %and
+  ret i64 %or
+}
+
+define i64 @dinsm(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 10, 33
+  %shl4 = shl i64 %j, 10
+  %and = and i64 %shl4, 8796093021184
+  %and5 = and i64 %i, -8796093021185
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
+
+define i64 @dinsu(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 40, 13
+  %shl4 = shl i64 %j, 40
+  %and = and i64 %shl4, 9006099743113216
+  %and5 = and i64 %i, -9006099743113217
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index b8f3ca9..abeff09 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
new file mode 100644
index 0000000..dca656c
--- /dev/null
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @foo3() nounwind readnone {
+entry:
+; CHECK: foo3
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i64 305419896
+}
+
+define i64 @foo6() nounwind readnone {
+entry:
+; CHECK: foo6
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i64 33332
+}
+
+define i64 @foo7() nounwind readnone {
+entry:
+; CHECK: foo7
+; CHECK: daddiu ${{[0-9]+}}, $zero, -32204
+  ret i64 -32204
+}
+
+define i64 @foo9() nounwind readnone {
+entry:
+; CHECK: foo9
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori $[[R1:[0-9]+]], $[[R0]], 22136
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 16
+; CHECK: ori $[[R3:[0-9]+]], $[[R2]], 36882
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 16
+; CHECK: ori ${{[0-9]+}}, $[[R4]], 13398
+  ret i64 1311768467284833366
+}
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index c9812a2..0418311 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r1 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -116,12 +116,12 @@ entry:
   ret i64 %rem
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclz $2, $4
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
@@ -129,7 +129,7 @@ define i64 @f19(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclo $2, $4
   %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
 }
 
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index fdf496b..af3a2f8 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @c = common global i8 0, align 4
 @s = common global i16 0, align 4
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
new file mode 100644
index 0000000..a89d074
--- /dev/null
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mflo
+  %mul = mul i64 %a1, %a0
+  ret i64 %mul
+}
+
+define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mflo
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mflo
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mfhi
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mfhi
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
diff --git a/test/CodeGen/Mips/mipslopat.ll b/test/CodeGen/Mips/mipslopat.ll
index 0279828..1f433b9 100644
--- a/test/CodeGen/Mips/mipslopat.ll
+++ b/test/CodeGen/Mips/mipslopat.ll
@@ -6,7 +6,7 @@
 
 define void @simple_vol_file() nounwind {
 entry:
-  %tmp = volatile load i32** @stat_vol_ptr_int, align 4
+  %tmp = load volatile i32** @stat_vol_ptr_int, align 4
   %0 = bitcast i32* %tmp to i8*
   call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
   %tmp1 = load i32** @stat_ptr_vol_int, align 4
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 4cc48f0..d1a67fd 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,19 +1,20 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=mips > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep call.*\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep lw.*\\\$baz %t
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define private void @foo() {
-        ret void
+; CHECK: foo:
+  ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
+; CHECK: bar:
+; CHECK: call16($foo)
+; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
+; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
+  call void @foo()
+  %1 = load i32* @baz, align 4
+  ret i32 %1
 }
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 8e27f4a..4f3cfb7 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 ; CHECK:  rotrv $2, $4
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b0474b4..3fa852b 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -44,3 +44,22 @@ entry:
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
 ; STATIC:   lw      $2, 0($[[R1]])
 }
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+; CHECK: f3:
+
+; PIC:   addiu   $4, $gp, %tlsldm(f3.i)
+; PIC:   jalr    $25
+; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
+; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
+; PIC:   addiu   ${{[0-9]+}}, $[[R1]], %dtprel_lo(f3.i)
+
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
+
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index 75555a7..9e501be 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -31,31 +31,31 @@ define ptx_device double @t1_f64() {
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i16 %x
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i32 %x
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i64 %x
 }
 
 define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret float %x
 }
 
 define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret double %x
 }
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
index 09015da..377f173 100644
--- a/test/CodeGen/PTX/parameter-order.ll
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}})
+; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
 define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
 ; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
 	%result = sub i32 %b, %c
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index cca9e65..3620b0e 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -2,11 +2,11 @@
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
-        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index e50fac4..d10291e 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index 9f35b83..fb8cdce 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index dd425f5..f256bca 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 7b6d491..e7a1cf6 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -12,7 +12,7 @@ declare void @IODelay(i32)
 
 define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
 entry:
-	volatile store i8 -1, i8* null, align 1
+	store volatile i8 -1, i8* null, align 1
 	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
 	br i1 %tmp28, label %bb107, label %bb
 
@@ -43,7 +43,7 @@ bb68:		; preds = %bb31
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
-	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp53.i = load volatile i16* null, align 2		; <i16> [#uses=2]
 	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
 	br i1 false, label %bb.i, label %bb65.i
 
@@ -59,7 +59,7 @@ bb70.i:		; preds = %bb65.i
 	ret i32 0
 
 _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
-	volatile store i8 -1, i8* %tmp202122.i, align 1
+	store volatile i8 -1, i8* %tmp202122.i, align 1
 	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
 	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
 	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index b73382e..3c01938 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
@@ -6,11 +6,11 @@ target triple = "powerpc-apple-darwin9.6"
 
 define void @foo() nounwind {
 entry:
+;CHECK:  lis r3, 1
+;CHECK:  ori r3, r3, 34524
 ;CHECK:  mfcr r2
 ;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  stwx r2, r1, r0
+;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
@@ -19,9 +19,9 @@ entry:
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  lwzx r2, r1, r0
+;CHECK:  lis r3, 1
+;CHECK:  ori r3, r3, 34524
+;CHECK:  lwzx r2, r1, r3
 ;CHECK:  rlwinm r2, r2, 24, 0, 31
 ;CHECK:  mtcrf 32, r2
   ret void
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
new file mode 100644
index 0000000..6161b55
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+
+@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1
+@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s122(i32 %n1, i32 %n3) nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  %sub = add nsw i32 %n1, -1
+  %cmp316 = icmp slt i32 %sub, 32000
+  br i1 %cmp316, label %entry.split.us, label %for.end.7
+
+entry.split.us:                                   ; preds = %entry
+  %0 = sext i32 %sub to i64
+  %1 = sext i32 %n3 to i64
+  br label %for.body4.lr.ph.us
+
+for.body4.us:                                     ; preds = %for.body4.lr.ph.us, %for.body4.us
+  %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ]
+  %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
+  %indvars.iv.next21 = add i64 %indvars.iv20, 1
+  %sub5.us = sub i64 31999, %indvars.iv20
+  %sext = shl i64 %sub5.us, 32
+  %idxprom.us = ashr exact i64 %sext, 32
+  %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
+  %2 = load float* %arrayidx.us, align 4, !tbaa !5
+  %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %3 = load float* %arrayidx7.us, align 4, !tbaa !5
+  %add8.us = fadd float %3, %2
+  store float %add8.us, float* %arrayidx7.us, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp3.us = icmp slt i32 %4, 32000
+  br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1
+
+for.body4.lr.ph.us:                               ; preds = %entry.split.us, %for.end.us.4
+  %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ]
+  br label %for.body4.us
+
+for.end12:                                        ; preds = %for.end.7, %for.end.us.4
+  %call13 = tail call i64 @clock() nounwind
+  %sub14 = sub nsw i64 %call13, %call1
+  %conv = sitofp i64 %sub14 to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 1)
+  ret i32 0
+
+for.body4.lr.ph.us.1:                             ; preds = %for.body4.us
+  %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.1
+
+for.body4.us.1:                                   ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
+  %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ]
+  %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ]
+  %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1
+  %sub5.us.1 = sub i64 31999, %indvars.iv20.1
+  %sext23 = shl i64 %sub5.us.1, 32
+  %idxprom.us.1 = ashr exact i64 %sext23, 32
+  %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
+  %5 = load float* %arrayidx.us.1, align 4, !tbaa !5
+  %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
+  %6 = load float* %arrayidx7.us.1, align 4, !tbaa !5
+  %add8.us.1 = fadd float %6, %5
+  store float %add8.us.1, float* %arrayidx7.us.1, align 4, !tbaa !5
+  %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
+  %7 = trunc i64 %indvars.iv.next.1 to i32
+  %cmp3.us.1 = icmp slt i32 %7, 32000
+  br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
+
+for.body4.lr.ph.us.2:                             ; preds = %for.body4.us.1
+  %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.2
+
+for.body4.us.2:                                   ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
+  %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ]
+  %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ]
+  %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1
+  %sub5.us.2 = sub i64 31999, %indvars.iv20.2
+  %sext24 = shl i64 %sub5.us.2, 32
+  %idxprom.us.2 = ashr exact i64 %sext24, 32
+  %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
+  %8 = load float* %arrayidx.us.2, align 4, !tbaa !5
+  %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
+  %9 = load float* %arrayidx7.us.2, align 4, !tbaa !5
+  %add8.us.2 = fadd float %9, %8
+  store float %add8.us.2, float* %arrayidx7.us.2, align 4, !tbaa !5
+  %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
+  %10 = trunc i64 %indvars.iv.next.2 to i32
+  %cmp3.us.2 = icmp slt i32 %10, 32000
+  br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
+
+for.body4.lr.ph.us.3:                             ; preds = %for.body4.us.2
+  %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.3
+
+for.body4.us.3:                                   ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
+  %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ]
+  %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ]
+  %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1
+  %sub5.us.3 = sub i64 31999, %indvars.iv20.3
+  %sext25 = shl i64 %sub5.us.3, 32
+  %idxprom.us.3 = ashr exact i64 %sext25, 32
+  %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
+  %11 = load float* %arrayidx.us.3, align 4, !tbaa !5
+  %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
+  %12 = load float* %arrayidx7.us.3, align 4, !tbaa !5
+  %add8.us.3 = fadd float %12, %11
+  store float %add8.us.3, float* %arrayidx7.us.3, align 4, !tbaa !5
+  %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
+  %13 = trunc i64 %indvars.iv.next.3 to i32
+  %cmp3.us.3 = icmp slt i32 %13, 32000
+  br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
+
+for.body4.lr.ph.us.4:                             ; preds = %for.body4.us.3
+  %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.4
+
+for.body4.us.4:                                   ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
+  %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ]
+  %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ]
+  %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1
+  %sub5.us.4 = sub i64 31999, %indvars.iv20.4
+  %sext26 = shl i64 %sub5.us.4, 32
+  %idxprom.us.4 = ashr exact i64 %sext26, 32
+  %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
+  %14 = load float* %arrayidx.us.4, align 4, !tbaa !5
+  %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
+  %15 = load float* %arrayidx7.us.4, align 4, !tbaa !5
+  %add8.us.4 = fadd float %15, %14
+  store float %add8.us.4, float* %arrayidx7.us.4, align 4, !tbaa !5
+  %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
+  %16 = trunc i64 %indvars.iv.next.4 to i32
+  %cmp3.us.4 = icmp slt i32 %16, 32000
+  br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
+
+for.end.us.4:                                     ; preds = %for.body4.us.4
+  %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.us.4 = add nsw i32 %nl.019.us, 5
+  %exitcond.4 = icmp eq i32 %inc.us.4, 200000
+  br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
+
+for.end.7:                                        ; preds = %entry, %for.end.7
+  %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
+  %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.7 = add nsw i32 %nl.019, 8
+  %exitcond.7 = icmp eq i32 %inc.7, 200000
+  br i1 %exitcond.7, label %for.end12, label %for.end.7
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
new file mode 100644
index 0000000..52bf6c7
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+@temp = common global float 0.000000e+00, align 4
+
+@.str81 = private unnamed_addr constant [6 x i8] c"s3110\00", align 1
+@.str235 = private unnamed_addr constant [15 x i8] c"S3110\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s3110() nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str81, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.end17, %entry
+  %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
+  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16, !tbaa !5
+  br label %for.cond5.preheader
+
+for.cond5.preheader:                              ; preds = %for.inc15, %for.body
+  %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.inc15 ]
+  %max.139 = phi float [ %0, %for.body ], [ %max.3.15, %for.inc15 ]
+  %xindex.138 = phi i32 [ 0, %for.body ], [ %xindex.3.15, %for.inc15 ]
+  %yindex.137 = phi i32 [ 0, %for.body ], [ %yindex.3.15, %for.inc15 ]
+  br label %for.body7
+
+for.body7:                                        ; preds = %for.body7, %for.cond5.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next.15, %for.body7 ]
+  %max.235 = phi float [ %max.139, %for.cond5.preheader ], [ %max.3.15, %for.body7 ]
+  %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
+  %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
+  %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
+  %1 = load float* %arrayidx9, align 16, !tbaa !5
+  %cmp10 = fcmp ogt float %1, %max.235
+  %2 = trunc i64 %indvars.iv to i32
+  %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
+  %3 = trunc i64 %indvars.iv42 to i32
+  %xindex.3 = select i1 %cmp10, i32 %3, i32 %xindex.234
+  %max.3 = select i1 %cmp10, float %1, float %max.235
+  %indvars.iv.next45 = or i64 %indvars.iv, 1
+  %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
+  %4 = load float* %arrayidx9.1, align 4, !tbaa !5
+  %cmp10.1 = fcmp ogt float %4, %max.3
+  %5 = trunc i64 %indvars.iv.next45 to i32
+  %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
+  %xindex.3.1 = select i1 %cmp10.1, i32 %3, i32 %xindex.3
+  %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
+  %indvars.iv.next.146 = or i64 %indvars.iv, 2
+  %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
+  %6 = load float* %arrayidx9.2, align 8, !tbaa !5
+  %cmp10.2 = fcmp ogt float %6, %max.3.1
+  %7 = trunc i64 %indvars.iv.next.146 to i32
+  %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
+  %xindex.3.2 = select i1 %cmp10.2, i32 %3, i32 %xindex.3.1
+  %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
+  %indvars.iv.next.247 = or i64 %indvars.iv, 3
+  %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
+  %8 = load float* %arrayidx9.3, align 4, !tbaa !5
+  %cmp10.3 = fcmp ogt float %8, %max.3.2
+  %9 = trunc i64 %indvars.iv.next.247 to i32
+  %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
+  %xindex.3.3 = select i1 %cmp10.3, i32 %3, i32 %xindex.3.2
+  %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
+  %indvars.iv.next.348 = or i64 %indvars.iv, 4
+  %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
+  %10 = load float* %arrayidx9.4, align 16, !tbaa !5
+  %cmp10.4 = fcmp ogt float %10, %max.3.3
+  %11 = trunc i64 %indvars.iv.next.348 to i32
+  %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
+  %xindex.3.4 = select i1 %cmp10.4, i32 %3, i32 %xindex.3.3
+  %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
+  %indvars.iv.next.449 = or i64 %indvars.iv, 5
+  %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
+  %12 = load float* %arrayidx9.5, align 4, !tbaa !5
+  %cmp10.5 = fcmp ogt float %12, %max.3.4
+  %13 = trunc i64 %indvars.iv.next.449 to i32
+  %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
+  %xindex.3.5 = select i1 %cmp10.5, i32 %3, i32 %xindex.3.4
+  %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
+  %indvars.iv.next.550 = or i64 %indvars.iv, 6
+  %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
+  %14 = load float* %arrayidx9.6, align 8, !tbaa !5
+  %cmp10.6 = fcmp ogt float %14, %max.3.5
+  %15 = trunc i64 %indvars.iv.next.550 to i32
+  %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
+  %xindex.3.6 = select i1 %cmp10.6, i32 %3, i32 %xindex.3.5
+  %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
+  %indvars.iv.next.651 = or i64 %indvars.iv, 7
+  %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
+  %16 = load float* %arrayidx9.7, align 4, !tbaa !5
+  %cmp10.7 = fcmp ogt float %16, %max.3.6
+  %17 = trunc i64 %indvars.iv.next.651 to i32
+  %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
+  %xindex.3.7 = select i1 %cmp10.7, i32 %3, i32 %xindex.3.6
+  %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
+  %indvars.iv.next.752 = or i64 %indvars.iv, 8
+  %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
+  %18 = load float* %arrayidx9.8, align 16, !tbaa !5
+  %cmp10.8 = fcmp ogt float %18, %max.3.7
+  %19 = trunc i64 %indvars.iv.next.752 to i32
+  %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
+  %xindex.3.8 = select i1 %cmp10.8, i32 %3, i32 %xindex.3.7
+  %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
+  %indvars.iv.next.853 = or i64 %indvars.iv, 9
+  %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
+  %20 = load float* %arrayidx9.9, align 4, !tbaa !5
+  %cmp10.9 = fcmp ogt float %20, %max.3.8
+  %21 = trunc i64 %indvars.iv.next.853 to i32
+  %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
+  %xindex.3.9 = select i1 %cmp10.9, i32 %3, i32 %xindex.3.8
+  %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
+  %indvars.iv.next.954 = or i64 %indvars.iv, 10
+  %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
+  %22 = load float* %arrayidx9.10, align 8, !tbaa !5
+  %cmp10.10 = fcmp ogt float %22, %max.3.9
+  %23 = trunc i64 %indvars.iv.next.954 to i32
+  %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
+  %xindex.3.10 = select i1 %cmp10.10, i32 %3, i32 %xindex.3.9
+  %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
+  %indvars.iv.next.1055 = or i64 %indvars.iv, 11
+  %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
+  %24 = load float* %arrayidx9.11, align 4, !tbaa !5
+  %cmp10.11 = fcmp ogt float %24, %max.3.10
+  %25 = trunc i64 %indvars.iv.next.1055 to i32
+  %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
+  %xindex.3.11 = select i1 %cmp10.11, i32 %3, i32 %xindex.3.10
+  %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
+  %indvars.iv.next.1156 = or i64 %indvars.iv, 12
+  %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
+  %26 = load float* %arrayidx9.12, align 16, !tbaa !5
+  %cmp10.12 = fcmp ogt float %26, %max.3.11
+  %27 = trunc i64 %indvars.iv.next.1156 to i32
+  %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
+  %xindex.3.12 = select i1 %cmp10.12, i32 %3, i32 %xindex.3.11
+  %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
+  %indvars.iv.next.1257 = or i64 %indvars.iv, 13
+  %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
+  %28 = load float* %arrayidx9.13, align 4, !tbaa !5
+  %cmp10.13 = fcmp ogt float %28, %max.3.12
+  %29 = trunc i64 %indvars.iv.next.1257 to i32
+  %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
+  %xindex.3.13 = select i1 %cmp10.13, i32 %3, i32 %xindex.3.12
+  %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
+  %indvars.iv.next.1358 = or i64 %indvars.iv, 14
+  %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
+  %30 = load float* %arrayidx9.14, align 8, !tbaa !5
+  %cmp10.14 = fcmp ogt float %30, %max.3.13
+  %31 = trunc i64 %indvars.iv.next.1358 to i32
+  %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
+  %xindex.3.14 = select i1 %cmp10.14, i32 %3, i32 %xindex.3.13
+  %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
+  %indvars.iv.next.1459 = or i64 %indvars.iv, 15
+  %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
+  %32 = load float* %arrayidx9.15, align 4, !tbaa !5
+  %cmp10.15 = fcmp ogt float %32, %max.3.14
+  %33 = trunc i64 %indvars.iv.next.1459 to i32
+  %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
+  %xindex.3.15 = select i1 %cmp10.15, i32 %3, i32 %xindex.3.14
+  %max.3.15 = select i1 %cmp10.15, float %32, float %max.3.14
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 256
+  br i1 %exitcond.15, label %for.inc15, label %for.body7
+
+for.inc15:                                        ; preds = %for.body7
+  %indvars.iv.next43 = add i64 %indvars.iv42, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end17, label %for.cond5.preheader
+
+for.end17:                                        ; preds = %for.inc15
+  %conv = sitofp i32 %xindex.3.15 to float
+  %add = fadd float %max.3.15, %conv
+  %conv18 = sitofp i32 %yindex.3.15 to float
+  %add19 = fadd float %add, %conv18
+  %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
+  %inc22 = add nsw i32 %nl.041, 1
+  %exitcond44 = icmp eq i32 %inc22, 78100
+  br i1 %exitcond44, label %for.end23, label %for.body
+
+for.end23:                                        ; preds = %for.end17
+  %call24 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call24, %call1
+  %conv25 = sitofp i64 %sub to double
+  %div = fdiv double %conv25, 1.000000e+06
+  %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+  %add29 = fadd float %add, 1.000000e+00
+  %add31 = fadd float %add29, %conv18
+  %add32 = fadd float %add31, 1.000000e+00
+  store float %add32, float* @temp, align 4, !tbaa !5
+  tail call void @check(i32 -1)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
new file mode 100644
index 0000000..a18829e
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
+entry:
+  %yy = load i32* %yp
+  %y = add i32 %yy, 1
+  %z = zext i32 %y to i64
+  %z2 = shl i64 %z, 32 
+  store i64 %z2, i64* %xx, align 4
+  ret void
+
+; CHECK: test:
+; CHECK: sldi {{.*}}, {{.*}}, 32
+; Note: it's okay if someday CodeGen gets smart enough to optimize out
+; the shift.
+}
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 466ae80..28dd08c 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 302d3df..a2af87e 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -15,18 +15,20 @@ define i32* @f1() nounwind {
 
 ; PPC32-NOFP: _f1:
 ; PPC32-NOFP: 	lis r0, -1
+; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	ori r0, r0, 32704
 ; PPC32-NOFP: 	stwux r1, r1, r0
-; PPC32-NOFP: 	addi r3, r1, 68
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
+
 ; PPC32-FP: _f1:
-; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	lis r0, -1
+; PPC32-FP:	stw r31, -4(r1)
+; PPC32-FP:	mr r31, r1
 ; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	stwux r1, r1, r0
-; ...
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -34,19 +36,20 @@ define i32* @f1() nounwind {
 
 ; PPC64-NOFP: _f1:
 ; PPC64-NOFP: 	lis r0, -1
+; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ori r0, r0, 32656
 ; PPC64-NOFP: 	stdux r1, r1, r0
-; PPC64-NOFP: 	addi r3, r1, 116
 ; PPC64-NOFP: 	ld r1, 0(r1)
 ; PPC64-NOFP: 	blr 
 
 
 ; PPC64-FP: _f1:
-; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	lis r0, -1
+; PPC64-FP:	std r31, -8(r1)
+; PPC64-FP:	mr r31, r1
 ; PPC64-FP:	ori r0, r0, 32640
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	stdux r1, r1, r0
-; ...
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 318ccb0..9a456b6 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,8 +2,8 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li 4, 2
 ; CHECK: li {{[53]}}, 0
+; CHECK: li 4, 2
 ; CHECK: li 6, 3
 ; CHECK: mr {{[53]}}, {{[53]}}
 
diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll
new file mode 100644
index 0000000..386c59e
--- /dev/null
+++ b/test/CodeGen/PowerPC/bl8_elf_nop.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck  %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @clock() nounwind
+
+define i32 @func() {
+entry:
+  %call = call i32 @clock() nounwind
+  %call2 = add i32 %call, 7
+  ret i32 %call2
+}
+
+; CHECK: bl clock
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index ab493a0..1d365d4 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,11 +1,11 @@
 ; Make sure this testcase does not use ctpop
 ; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
-        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]
+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 36779ff..4b6f88b 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -17,10 +17,22 @@ entry:
 bb2:                                              ; preds = %entry, %bb3
   %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
 ; PIC: mtctr
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
 ; STATIC-NEXT: bctr
 ; PPC64: mtctr
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
 ; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
diff --git a/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
new file mode 100644
index 0000000..aa7de16
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=sparc <%s
+
+define void @foo(i32 %a) nounwind {
+entry:
+  br i1 undef, label %return, label %else.0
+
+else.0:
+  br i1 undef, label %if.end.0, label %return
+
+if.end.0:
+  br i1 undef, label %if.then.1, label %else.1
+
+else.1:
+  %0 = bitcast i8* undef to i8**
+  br label %else.1.2
+
+if.then.1:
+  br i1 undef, label %return, label %return
+
+else.1.2:
+  br i1 undef, label %return, label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index c2ba208..50a1a07 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -13,9 +13,9 @@ entry:
 
 bb:             ; preds = %bb, %entry
         %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
-        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]
+        %tmp = load volatile i8** %va           ; <i8*> [#uses=2]
         %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]
-        volatile store i8* %tmp2, i8** %va
+        store volatile i8* %tmp2, i8** %va
         %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
         %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]
         br i1 %tmp.upgrd.2, label %bb7, label %bb
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index bb734ac..fcf1bae 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -21,7 +21,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  volatile store i32 1, i32* %tmp1, align 4
+  store volatile i32 1, i32* %tmp1, align 4
   %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
   store i32 %levels, i32* %tmp12, align 4
   %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
@@ -46,7 +46,7 @@ entry:
   %tmp24 = shl i32 %flags.0, 16
   %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
   %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
-  volatile store i32 1, i32* %tmp25, align 4
+  store volatile i32 1, i32* %tmp25, align 4
   %tmp26 = icmp eq i32 %levels, 0
   br i1 %tmp26, label %return, label %bb4
 
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 4e16c9a..8285742 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -94,8 +94,8 @@ bb.nph:
 
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK: eor.w
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 0992fa8..7993bbf 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 ; rdar://7354379
 
-declare double @floor(double) nounwind readnone
+declare double @foo(double) nounwind readnone
 
 define void @t(i32 %c, double %b) {
 entry:
@@ -26,7 +26,7 @@ bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
 ; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
-  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
 bb11:                                             ; preds = %bb9, %bb7
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 00a54a0..f7e9665 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -3,8 +3,8 @@
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: clz r
-    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
     ret i32 %tmp
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index ceefabb..74729fd 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -3,8 +3,8 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
-; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
+; CHECK: add r0, r1
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
deleted file mode 100644
index 704b2ba..0000000
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86
-; RUN: llc < %s -march=x86-64
-
-	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
-
-define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
-entry:
-	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
-	unreachable
-}
-
-declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 90af387..a6234d3 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | not grep movsd
 ; RUN: llc < %s -march=x86 | grep movw
 ; RUN: llc < %s -march=x86 | grep addw
-; These transforms are turned off for volatile loads and stores.
+; These transforms are turned off for load volatiles and stores.
 ; Check that they weren't turned off for all loads and stores!
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 8665282..037559e 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -8,13 +8,13 @@
 
 define i16 @f(i64 %x, double %y) {
 	%b = bitcast i64 %x to double		; <double> [#uses=1]
-	volatile store double %b, double* @atomic ; one processor operation only
-	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	store volatile double %b, double* @atomic ; one processor operation only
+	store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
 	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
-	volatile store i64 %b2, i64* @anything ; may transform to store of double
-	%l = volatile load i32* @ioport		; must not narrow
+	store volatile i64 %b2, i64* @anything ; may transform to store of double
+	%l = load volatile i32* @ioport		; must not narrow
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = volatile load i32* @ioport		; must not narrow
+	%l2 = load volatile i32* @ioport		; must not narrow
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index 101b3c5..f0d46a0 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movap
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 935c4c5..f35245b 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_407, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 9d24084..3e42553 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -6,6 +6,6 @@ define void @test(<8 x double>* %P, i64* %Q) nounwind {
 	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
 	%C = lshr i512 %B, 448		; <i512> [#uses=1]
 	%D = trunc i512 %C to i64		; <i64> [#uses=1]
-	volatile store i64 %D, i64* %Q
+	store volatile i64 %D, i64* %Q
 	ret void
 }
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 90dabb8..8bbdb0e 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -9,30 +9,30 @@
 @X = external global i64		; <i64*> [#uses=25]
 
 define fastcc i64 @foo() nounwind {
-	%tmp = volatile load i64* @X		; <i64> [#uses=7]
-	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
-	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp = load volatile i64* @X		; <i64> [#uses=7]
+	%tmp1 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp2 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp3 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp4 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp5 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp6 = load volatile i64* @X		; <i64> [#uses=2]
+	%tmp7 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp8 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp9 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp10 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp11 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp12 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp13 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp14 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp15 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp16 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp17 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp18 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp19 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp20 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp21 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp22 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp23 = load volatile i64* @X		; <i64> [#uses=1]
 	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
 	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
 	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
@@ -229,7 +229,7 @@ define fastcc i64 @foo() nounwind {
 	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
 	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
 	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
-	volatile store i64 %tmp219, i64* @X, align 8
+	store volatile i64 %tmp219, i64* @X, align 8
 	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
 	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
 	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index a5e28c0..c2cd89c 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@ entry:
 	br label %bb
 
 bb:		; preds = %bb.i, %bb, %entry
-	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g_9, align 4		; <i32> [#uses=2]
 	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
 	%4 = and i1 %3, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb.i, label %bb
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 790fd88..410a42a 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,18 +41,18 @@ bb3:                                              ; preds = %bb2, %bb
   br i1 undef, label %bb5, label %bb4
 
 bb4:                                              ; preds = %bb3
-  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %17 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %18 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
   %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
   br i1 undef, label %return, label %bb6.preheader
 
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
-  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %22 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
new file mode 100644
index 0000000..0a949eb
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "x86_64-apple-macosx10.6.6"
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb        %xmm1, %xmm2
+; CHECK: ret
+
+define void @select_func() {
+entry:
+  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
+  %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
+  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
+  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
+  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
+  store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
new file mode 100644
index 0000000..fcaabdd
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11494
+
+define void @test(<4 x i32>* nocapture %p) nounwind {
+  ; CHECK: test:
+  ; CHECK: vpxor %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
+  ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
+  ; CHECK-NEXT: ret
+  %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  store <4 x i32> %c, <4 x i32>* %p, align 1
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
new file mode 100644
index 0000000..7a4126f
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR11495
+
+; CHECK: 1311768467463790320
+@v = global <2 x float> bitcast (<1 x i64> <i64 1311768467463790320> to <2 x float>), align 8
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
new file mode 100644
index 0000000..d978102
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mattr=+avx
+; Various missing patterns causing crashes.
+; rdar://10538793
+
+define void @t1() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t1.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t1.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  br label %t1.exit
+
+t1.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define void @t2() nounwind {
+  br i1 undef, label %1, label %4
+
+; <label>:1                                       ; preds = %0
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %4
+
+; <label>:4                                       ; preds = %1, %0
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t2.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t2.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %t2.exit
+
+t2.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define <3 x i64> @t4() nounwind {
+entry:
+  %0 = load <2 x i64> addrspace(1)* undef, align 16
+  %1 = extractelement <2 x i64> %0, i32 0
+  %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
+  ret <3 x i64> %2
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
new file mode 100644
index 0000000..2b98b5a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+; Test case for r146671
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
+  ; CHECK: psllw $4, [[REG:%xmm.]]
+  ; CHECK-NEXT: movdqa
+  ; CHECK-NEXT: pblendvb [[REG]],{{ %xmm.}}
+  %1 = shl <16 x i8> %a, %b
+  ret <16 x i8> %1
+}
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
new file mode 100644
index 0000000..ceee8e6
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
+; CHECK: prom_bug
+; CHECK: movd
+; CHECK: shufb
+; CHECK: movw
+; CHECK: ret
+define void @prom_bug(<4 x i8> %t, i16* %p) {
+  %r = bitcast <4 x i8> %t to <2 x i16>
+  %o = extractelement <2 x i16> %r, i32 0
+  store i16 %o, i16* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 59988ca..4aa3370 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -259,3 +259,14 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   ret <4 x i64> %x
 }
 
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @int_sqrt_ss() {
+; CHECK: int_sqrt_ss
+; CHECK: vsqrtss
+ %x0 = load float addrspace(1)* undef, align 8
+ %x1 = insertelement <4 x float> undef, float %x0, i32 0
+ %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
+ ret <4 x float> %x2
+}
+
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index edbdc06..8ad0fa8 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -105,3 +105,19 @@ allocas:
   ret <8 x i32> %updatedret.i30.i
 }
 
+;;;; Don't crash on fneg
+; rdar://10566486
+; CHECK: fneg
+; CHECK: vxorps
+define <16 x float> @fneg(<16 x float> addrspace(1)* nocapture %out) nounwind {
+  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ret <16 x float> %1
+}
+
+;;; Don't crash on build vector
+; CHECK: @build_vec_16x16
+; CHECK: vmovd
+define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
+  %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
+  ret <16 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index d6d2415..32d450c 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -16,7 +16,7 @@ entry:
   ret <4 x double> %shuffle.i
 }
 
-; CHECK: vpxor
+; CHECK: vxorps
 ; CHECK-NEXT: vinsertf128 $0
 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index df12b71..f583914 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -245,34 +245,6 @@ define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
-
-
-define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
-
-
-define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-  ; CHECK: pushl
-  ; CHECK: movl
-  ; CHECK: vmaskmovdqu
-  ; CHECK: popl
-  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
 
 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vmaxpd
@@ -314,28 +286,6 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 
-define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
-  ; CHECK: test_x86_sse2_movnt_dq
-  ; CHECK: movl
-  ; CHECK: vmovntdq
-  ; add operation forces the execution domain.
-  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
-
-
-define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
-  ; CHECK test_x86_sse2_movnt_pd
-  ; CHECK: movl
-  ; CHECK: vmovntpd
-  ; fadd operation forces the execution domain.
-  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
@@ -967,14 +917,6 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovntdqa
-  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
-
 
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vmpsadbw
@@ -1524,14 +1466,6 @@ define void @test_x86_sse_ldmxcsr(i8* %a0) {
 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
 
 
-define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
-
 
 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmaxps
@@ -1573,14 +1507,6 @@ define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
 
 
-define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntps
-  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
-
 
 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmulss
@@ -2019,34 +1945,6 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 
-define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
-  ; CHECK: vmovdqu
-  %a1 = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
-  ; add operation forces the execution domain.
-  %res = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
-
-
-define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
-  ; CHECK: vmovupd
-  %a1 = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
-  ; add operation forces the execution domain.
-  %res = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
-
-
-define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
-  ; CHECK: vmovups
-  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
-
-
 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: vmaskmovpd
   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -2159,32 +2057,10 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 
-define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
-  ; CHECK: vmovntdq
-  ; add operation forces the execution domain.
-  %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
-  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a2)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 
-define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
-  ; CHECK: vmovntpd
-  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
-  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a2)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
 
 
-define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
-  ; CHECK: vmovntps
-  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
-
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
@@ -2264,7 +2140,8 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 
 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
-  ; CHECK: vmovdqu
+  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+  ; CHECK: vmovups
   ; add operation forces the execution domain.
   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
@@ -2456,6 +2333,12 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
+define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
+  ; CHECK: vpermilps
+  %a2 = load <4 x i32>* %a1
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
 
 
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 0db334d..e9392ae 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -8,3 +8,13 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
 ; CHECK: vshufps
 ; CHECK: vpshufd
 }
+
+; rdar://10538417
+define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
+; CHECK: test2:
+; CHECK: vxorpd
+; CHECK: vmovsd
+  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
+  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
+  ret <3 x i64> %2
+}
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index af20b90..f8522c2 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -47,7 +47,7 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
+; CHECK: vmovaps
 ; CHECK-NEXT: vinsertf128  $1
 ; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
new file mode 100644
index 0000000..b0932bd
--- /dev/null
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; <rdar://problem/10463281>
+; Check that the <8 x float> is passed on the stack.
+
+@x = common global <8 x float> zeroinitializer, align 32
+declare i32 @f(i32, ...)
+
+; CHECK: test1:
+; CHECK: vmovaps	%ymm0, (%rsp)
+define void @test1() nounwind uwtable ssp {
+entry:
+  %0 = load <8 x float>* @x, align 32
+  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index 3550a90..caa21e5 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: _A
 ; CHECK: vperm2f128 $1
 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -7,6 +8,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _B
 ; CHECK: vperm2f128 $48
 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +16,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _C
 ; CHECK: vperm2f128 $0
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -21,6 +24,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _D
 ; CHECK: vperm2f128 $17
 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -28,6 +32,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _E
 ; CHECK: vperm2f128 $17
 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
 entry:
@@ -35,7 +40,8 @@ entry:
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vperm2f128 $33
+; CHECK: _E2
+; CHECK: vperm2f128 $3
 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -44,6 +50,7 @@ entry:
 
 ;;;; Cases with undef indicies mixed in the mask
 
+; CHECK: _F
 ; CHECK: vperm2f128 $33
 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 49b2f54..3d521e7 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -28,6 +28,14 @@ entry:
   ret <4 x i64> %shuffle
 }
 
+; CHECK: vpermilpd
+define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
 ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
 ; target specific mask was correctly generated.
 ; CHECK: vpermilps $-100
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index 7ec3a44..0ccbc59 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,6 +7,15 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, (%
+define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x float>* %a
+  %b2 = load <8 x float>* %b
+  %shuffle = shufflevector <8 x float> %a2, <8 x float> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x float> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +23,15 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, (%
+define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x double>* %a
+  %b2 = load <4 x double>* %b
+  %shuffle = shufflevector <4 x double> %a2, <4 x double> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index bab7fb8..a0f351d 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1046,3 +1046,13 @@ define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   ret <8 x i32> %res
 }
 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
new file mode 100644
index 0000000..4eac71d
--- /dev/null
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s
+
+; CHECK: phaddw1:
+; CHECK: vphaddw
+define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddw2:
+; CHECK: vphaddw
+define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 0, i32 2, i32 4, i32 6, i32 24, i32 26, i32 28, i32 30, i32 8, i32 10, i32 12, i32 14>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddd1:
+; CHECK: vphaddd
+define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd2:
+; CHECK: vphaddd
+define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd3:
+; CHECK: vphaddd
+define <8 x i32> @phaddd3(<8 x i32> %x) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubw1:
+; CHECK: vphsubw
+define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = sub <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phsubd1:
+; CHECK: vphsubd
+define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubd2:
+; CHECK: vphsubd
+define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 undef, i32 9, i32 11, i32 5, i32 7, i32 undef, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll
new file mode 100644
index 0000000..1937db5
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm2i128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vperm2i128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2i128 $3
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vperm2i128 $49
+define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vperm2i128 $2
+define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vperm2i128 $2, (%
+define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
+entry:
+  %c = load <16 x i16>* %a
+  %d = load <16 x i16>* %b
+  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ac972a8..ceabcb7 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -11,7 +11,7 @@ entry:
 
 define float @extractFloat2() nounwind {
 entry:
-  ; CHECK: pxor	%xmm0, %xmm0
+  ; CHECK: xorps	%xmm0, %xmm0
   %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
   %tmp5 = extractelement <2 x float> %tmp4, i32 1
   ret float %tmp5
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index f87d1a6..167d522 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -122,14 +122,14 @@ define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
 ; CHECK: test_loop_early_exits:
 ; CHECK: %entry
-; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
 ; CHECK: %body4
-; CHECK: %exit
+; CHECK: %body1
 ; CHECK: %bail1
 ; CHECK: %bail2
 ; CHECK: %bail3
+; CHECK: %exit
 
 entry:
   br label %body1
@@ -169,6 +169,74 @@ exit:
   ret i32 %sum
 }
 
+define i32 @test_loop_rotate(i32 %i, i32* %a) {
+; Check that we rotate conditional exits from the loop to the bottom of the
+; loop, eliminating unconditional branches to the top.
+; CHECK: test_loop_rotate:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body0
+; CHECK: %exit
+
+entry:
+  br label %body0
+
+body0:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+body1:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %bailcond1 = icmp eq i32 %sum, 42
+  br label %body0
+
+exit:
+  ret i32 %base
+}
+
+define void @test_loop_rotate_reversed_blocks() {
+; This test case (greatly reduced from an Olden bencmark) ensures that the loop
+; rotate implementation doesn't assume that loops are laid out in a particular
+; order. The first loop will get split into two basic blocks, with the loop
+; header coming after the loop latch.
+;
+; CHECK: test_loop_rotate_reversed_blocks
+; CHECK: %entry
+; Look for a jump into the middle of the loop, and no branches mid-way.
+; CHECK: jmp
+; CHECK: %loop1
+; CHECK-NOT: j{{\w*}} .LBB{{.*}}
+; CHECK: %loop1
+; CHECK: je
+
+entry:
+  %cond1 = load volatile i1* undef
+  br i1 %cond1, label %loop2.preheader, label %loop1
+
+loop1:
+  call i32 @f()
+  %cond2 = load volatile i1* undef
+  br i1 %cond2, label %loop2.preheader, label %loop1
+
+loop2.preheader:
+  call i32 @f()
+  %cond3 = load volatile i1* undef
+  br i1 %cond3, label %exit, label %loop2
+
+loop2:
+  call i32 @f()
+  %cond4 = load volatile i1* undef
+  br i1 %cond4, label %exit, label %loop2
+
+exit:
+  ret void
+}
+
 define i32 @test_loop_align(i32 %i, i32* %a) {
 ; Check that we provide basic loop body alignment with the block placement
 ; pass.
@@ -278,9 +346,9 @@ define void @unnatural_cfg2() {
 ; single-source GCC.
 ; CHECK: unnatural_cfg2
 ; CHECK: %entry
-; CHECK: %loop.header
 ; CHECK: %loop.body1
 ; CHECK: %loop.body2
+; CHECK: %loop.header
 ; CHECK: %loop.body3
 ; CHECK: %loop.inner1.begin
 ; The end block is folded with %loop.body3...
@@ -537,8 +605,8 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
 ; CHECK: test_unnatural_cfg_backwards_inner_loop
 ; CHECK: %entry
 ; CHECK: %body
-; CHECK: %loop1
 ; CHECK: %loop2b
+; CHECK: %loop1
 ; CHECK: %loop2a
 
 entry:
@@ -660,199 +728,199 @@ define void @many_unanalyzable_branches() {
 entry:
   br label %0
 
-  %val0 = volatile load float* undef
+  %val0 = load volatile float* undef
   %cmp0 = fcmp une float %val0, undef
   br i1 %cmp0, label %1, label %0
-  %val1 = volatile load float* undef
+  %val1 = load volatile float* undef
   %cmp1 = fcmp une float %val1, undef
   br i1 %cmp1, label %2, label %1
-  %val2 = volatile load float* undef
+  %val2 = load volatile float* undef
   %cmp2 = fcmp une float %val2, undef
   br i1 %cmp2, label %3, label %2
-  %val3 = volatile load float* undef
+  %val3 = load volatile float* undef
   %cmp3 = fcmp une float %val3, undef
   br i1 %cmp3, label %4, label %3
-  %val4 = volatile load float* undef
+  %val4 = load volatile float* undef
   %cmp4 = fcmp une float %val4, undef
   br i1 %cmp4, label %5, label %4
-  %val5 = volatile load float* undef
+  %val5 = load volatile float* undef
   %cmp5 = fcmp une float %val5, undef
   br i1 %cmp5, label %6, label %5
-  %val6 = volatile load float* undef
+  %val6 = load volatile float* undef
   %cmp6 = fcmp une float %val6, undef
   br i1 %cmp6, label %7, label %6
-  %val7 = volatile load float* undef
+  %val7 = load volatile float* undef
   %cmp7 = fcmp une float %val7, undef
   br i1 %cmp7, label %8, label %7
-  %val8 = volatile load float* undef
+  %val8 = load volatile float* undef
   %cmp8 = fcmp une float %val8, undef
   br i1 %cmp8, label %9, label %8
-  %val9 = volatile load float* undef
+  %val9 = load volatile float* undef
   %cmp9 = fcmp une float %val9, undef
   br i1 %cmp9, label %10, label %9
-  %val10 = volatile load float* undef
+  %val10 = load volatile float* undef
   %cmp10 = fcmp une float %val10, undef
   br i1 %cmp10, label %11, label %10
-  %val11 = volatile load float* undef
+  %val11 = load volatile float* undef
   %cmp11 = fcmp une float %val11, undef
   br i1 %cmp11, label %12, label %11
-  %val12 = volatile load float* undef
+  %val12 = load volatile float* undef
   %cmp12 = fcmp une float %val12, undef
   br i1 %cmp12, label %13, label %12
-  %val13 = volatile load float* undef
+  %val13 = load volatile float* undef
   %cmp13 = fcmp une float %val13, undef
   br i1 %cmp13, label %14, label %13
-  %val14 = volatile load float* undef
+  %val14 = load volatile float* undef
   %cmp14 = fcmp une float %val14, undef
   br i1 %cmp14, label %15, label %14
-  %val15 = volatile load float* undef
+  %val15 = load volatile float* undef
   %cmp15 = fcmp une float %val15, undef
   br i1 %cmp15, label %16, label %15
-  %val16 = volatile load float* undef
+  %val16 = load volatile float* undef
   %cmp16 = fcmp une float %val16, undef
   br i1 %cmp16, label %17, label %16
-  %val17 = volatile load float* undef
+  %val17 = load volatile float* undef
   %cmp17 = fcmp une float %val17, undef
   br i1 %cmp17, label %18, label %17
-  %val18 = volatile load float* undef
+  %val18 = load volatile float* undef
   %cmp18 = fcmp une float %val18, undef
   br i1 %cmp18, label %19, label %18
-  %val19 = volatile load float* undef
+  %val19 = load volatile float* undef
   %cmp19 = fcmp une float %val19, undef
   br i1 %cmp19, label %20, label %19
-  %val20 = volatile load float* undef
+  %val20 = load volatile float* undef
   %cmp20 = fcmp une float %val20, undef
   br i1 %cmp20, label %21, label %20
-  %val21 = volatile load float* undef
+  %val21 = load volatile float* undef
   %cmp21 = fcmp une float %val21, undef
   br i1 %cmp21, label %22, label %21
-  %val22 = volatile load float* undef
+  %val22 = load volatile float* undef
   %cmp22 = fcmp une float %val22, undef
   br i1 %cmp22, label %23, label %22
-  %val23 = volatile load float* undef
+  %val23 = load volatile float* undef
   %cmp23 = fcmp une float %val23, undef
   br i1 %cmp23, label %24, label %23
-  %val24 = volatile load float* undef
+  %val24 = load volatile float* undef
   %cmp24 = fcmp une float %val24, undef
   br i1 %cmp24, label %25, label %24
-  %val25 = volatile load float* undef
+  %val25 = load volatile float* undef
   %cmp25 = fcmp une float %val25, undef
   br i1 %cmp25, label %26, label %25
-  %val26 = volatile load float* undef
+  %val26 = load volatile float* undef
   %cmp26 = fcmp une float %val26, undef
   br i1 %cmp26, label %27, label %26
-  %val27 = volatile load float* undef
+  %val27 = load volatile float* undef
   %cmp27 = fcmp une float %val27, undef
   br i1 %cmp27, label %28, label %27
-  %val28 = volatile load float* undef
+  %val28 = load volatile float* undef
   %cmp28 = fcmp une float %val28, undef
   br i1 %cmp28, label %29, label %28
-  %val29 = volatile load float* undef
+  %val29 = load volatile float* undef
   %cmp29 = fcmp une float %val29, undef
   br i1 %cmp29, label %30, label %29
-  %val30 = volatile load float* undef
+  %val30 = load volatile float* undef
   %cmp30 = fcmp une float %val30, undef
   br i1 %cmp30, label %31, label %30
-  %val31 = volatile load float* undef
+  %val31 = load volatile float* undef
   %cmp31 = fcmp une float %val31, undef
   br i1 %cmp31, label %32, label %31
-  %val32 = volatile load float* undef
+  %val32 = load volatile float* undef
   %cmp32 = fcmp une float %val32, undef
   br i1 %cmp32, label %33, label %32
-  %val33 = volatile load float* undef
+  %val33 = load volatile float* undef
   %cmp33 = fcmp une float %val33, undef
   br i1 %cmp33, label %34, label %33
-  %val34 = volatile load float* undef
+  %val34 = load volatile float* undef
   %cmp34 = fcmp une float %val34, undef
   br i1 %cmp34, label %35, label %34
-  %val35 = volatile load float* undef
+  %val35 = load volatile float* undef
   %cmp35 = fcmp une float %val35, undef
   br i1 %cmp35, label %36, label %35
-  %val36 = volatile load float* undef
+  %val36 = load volatile float* undef
   %cmp36 = fcmp une float %val36, undef
   br i1 %cmp36, label %37, label %36
-  %val37 = volatile load float* undef
+  %val37 = load volatile float* undef
   %cmp37 = fcmp une float %val37, undef
   br i1 %cmp37, label %38, label %37
-  %val38 = volatile load float* undef
+  %val38 = load volatile float* undef
   %cmp38 = fcmp une float %val38, undef
   br i1 %cmp38, label %39, label %38
-  %val39 = volatile load float* undef
+  %val39 = load volatile float* undef
   %cmp39 = fcmp une float %val39, undef
   br i1 %cmp39, label %40, label %39
-  %val40 = volatile load float* undef
+  %val40 = load volatile float* undef
   %cmp40 = fcmp une float %val40, undef
   br i1 %cmp40, label %41, label %40
-  %val41 = volatile load float* undef
+  %val41 = load volatile float* undef
   %cmp41 = fcmp une float %val41, undef
   br i1 %cmp41, label %42, label %41
-  %val42 = volatile load float* undef
+  %val42 = load volatile float* undef
   %cmp42 = fcmp une float %val42, undef
   br i1 %cmp42, label %43, label %42
-  %val43 = volatile load float* undef
+  %val43 = load volatile float* undef
   %cmp43 = fcmp une float %val43, undef
   br i1 %cmp43, label %44, label %43
-  %val44 = volatile load float* undef
+  %val44 = load volatile float* undef
   %cmp44 = fcmp une float %val44, undef
   br i1 %cmp44, label %45, label %44
-  %val45 = volatile load float* undef
+  %val45 = load volatile float* undef
   %cmp45 = fcmp une float %val45, undef
   br i1 %cmp45, label %46, label %45
-  %val46 = volatile load float* undef
+  %val46 = load volatile float* undef
   %cmp46 = fcmp une float %val46, undef
   br i1 %cmp46, label %47, label %46
-  %val47 = volatile load float* undef
+  %val47 = load volatile float* undef
   %cmp47 = fcmp une float %val47, undef
   br i1 %cmp47, label %48, label %47
-  %val48 = volatile load float* undef
+  %val48 = load volatile float* undef
   %cmp48 = fcmp une float %val48, undef
   br i1 %cmp48, label %49, label %48
-  %val49 = volatile load float* undef
+  %val49 = load volatile float* undef
   %cmp49 = fcmp une float %val49, undef
   br i1 %cmp49, label %50, label %49
-  %val50 = volatile load float* undef
+  %val50 = load volatile float* undef
   %cmp50 = fcmp une float %val50, undef
   br i1 %cmp50, label %51, label %50
-  %val51 = volatile load float* undef
+  %val51 = load volatile float* undef
   %cmp51 = fcmp une float %val51, undef
   br i1 %cmp51, label %52, label %51
-  %val52 = volatile load float* undef
+  %val52 = load volatile float* undef
   %cmp52 = fcmp une float %val52, undef
   br i1 %cmp52, label %53, label %52
-  %val53 = volatile load float* undef
+  %val53 = load volatile float* undef
   %cmp53 = fcmp une float %val53, undef
   br i1 %cmp53, label %54, label %53
-  %val54 = volatile load float* undef
+  %val54 = load volatile float* undef
   %cmp54 = fcmp une float %val54, undef
   br i1 %cmp54, label %55, label %54
-  %val55 = volatile load float* undef
+  %val55 = load volatile float* undef
   %cmp55 = fcmp une float %val55, undef
   br i1 %cmp55, label %56, label %55
-  %val56 = volatile load float* undef
+  %val56 = load volatile float* undef
   %cmp56 = fcmp une float %val56, undef
   br i1 %cmp56, label %57, label %56
-  %val57 = volatile load float* undef
+  %val57 = load volatile float* undef
   %cmp57 = fcmp une float %val57, undef
   br i1 %cmp57, label %58, label %57
-  %val58 = volatile load float* undef
+  %val58 = load volatile float* undef
   %cmp58 = fcmp une float %val58, undef
   br i1 %cmp58, label %59, label %58
-  %val59 = volatile load float* undef
+  %val59 = load volatile float* undef
   %cmp59 = fcmp une float %val59, undef
   br i1 %cmp59, label %60, label %59
-  %val60 = volatile load float* undef
+  %val60 = load volatile float* undef
   %cmp60 = fcmp une float %val60, undef
   br i1 %cmp60, label %61, label %60
-  %val61 = volatile load float* undef
+  %val61 = load volatile float* undef
   %cmp61 = fcmp une float %val61, undef
   br i1 %cmp61, label %62, label %61
-  %val62 = volatile load float* undef
+  %val62 = load volatile float* undef
   %cmp62 = fcmp une float %val62, undef
   br i1 %cmp62, label %63, label %62
-  %val63 = volatile load float* undef
+  %val63 = load volatile float* undef
   %cmp63 = fcmp une float %val63, undef
   br i1 %cmp63, label %64, label %63
-  %val64 = volatile load float* undef
+  %val64 = load volatile float* undef
   %cmp64 = fcmp une float %val64, undef
   br i1 %cmp64, label %65, label %64
 
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 69cf736..cde9b48 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,40 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
 define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+       %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
        ret i32 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
+       %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
        ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
 
 define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
+       %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
        ret i64 %tmp
 ; CHECK: t3:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
 
 define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
+       %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
        ret i8 %tmp
 ; CHECK: t4:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index d76fab4..9b26efd 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,36 +1,36 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
 
 define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
 	ret i32 %tmp
 ; CHECK: t1:
 ; CHECK: bsrl
 ; CHECK: cmov
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
 
 define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+	%tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
 	ret i32 %tmp
 ; CHECK: t2:
 ; CHECK: bsfl
 ; CHECK: cmov
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone 
 
 define i16 @t3(i16 %x, i16 %y) nounwind  {
 entry:
         %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
+	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1, i1 true )		; <i16> [#uses=1]
 	ret i16 %tmp2
 ; CHECK: t3:
 ; CHECK: bsrw
 ; CHECK: cmov
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone 
 
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
@@ -43,6 +43,6 @@ entry:
 ; CHECK-NOT: cmov
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true)
   ret i32 %tmp1
 }
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 7a8d6e6..2e7ffbf 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -84,7 +84,7 @@ entry:
   br i1 %3, label %func_4.exit.i, label %bb.i.i.i
 
 bb.i.i.i:                                         ; preds = %entry
-  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
 ; CHECK: test4:
@@ -101,7 +101,7 @@ func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
 
 bb.i.i:                                           ; preds = %func_4.exit.i
-  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %5 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_1.exit
 
 func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 8aa0bfd..d9e0778 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -21,6 +21,6 @@ bb:		; preds = %bb, %entry
 	br i1 %exitcond, label %bb13, label %bb
 
 bb13:		; preds = %bb
-	volatile store float %tmp6, float* @G, align 4
+	store volatile float %tmp6, float* @G, align 4
 	ret void
 }
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 1531457..cf6e27d 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -6,16 +6,16 @@
 ; Chain and flag folding issues.
 define i32 @test1() nounwind ssp {
 entry:
-  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %tmp5.i = load volatile i32* undef              ; <i32> [#uses=1]
   %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
-  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %tmp12.i = load volatile i32* undef             ; <i32> [#uses=1]
   %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
   %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
   %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
   %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
   %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
   %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
-  volatile store i32 %conv19.i, i32* undef
+  store volatile i32 %conv19.i, i32* undef
   ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 377fd11..d8f4663 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -225,18 +225,20 @@ if.else:                                          ; preds = %entry
 ; CHECK-NEXT: je 
 }
 
-; Check that 0.0 is materialized using pxor
+; Check that 0.0 is materialized using xorps
 define void @test18(float* %p1) {
   store float 0.0, float* %p1
   ret void
 ; CHECK: test18:
-; CHECK: pxor
+; CHECK: xorps
 }
+
+; Without any type hints, doubles use the smaller xorps instead of xorpd.
 define void @test19(double* %p1) {
   store double 0.0, double* %p1
   ret void
 ; CHECK: test19:
-; CHECK: pxor
+; CHECK: xorps
 }
 
 ; Check that we fast-isel sret
@@ -252,12 +254,12 @@ entry:
 }
 declare void @test20sret(%struct.a* sret)
 
-; Check that -0.0 is not materialized using pxor
+; Check that -0.0 is not materialized using xor
 define void @test21(double* %p1) {
   store double -0.0, double* %p1
   ret void
 ; CHECK: test21:
-; CHECK-NOT: pxor
+; CHECK-NOT: xor
 ; CHECK: movsd	LCPI
 }
 
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
new file mode 100644
index 0000000..bd94c13
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,243 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+
+; VFMADD
+define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUB
+define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMADD
+define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMSUB
+define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMADDSUB
+define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUBADD
+define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index b9cb5d7..df90254 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -10,7 +10,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
 ; Pass arguments on the stack.
 ; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
 ; Copy constant-pool value.
-; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fldl LCPI
 ; CHECK-NEXT: fstpt 16([[RCX]])
 ; Copy x1 return value.
 ; CHECK-NEXT: fstpt ([[RCX]])
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index f220b24..3e26141 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp13 = tail call double @foo()
 	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
-	volatile store double %tmp3940, double* %b
+	store volatile double %tmp3940, double* %b
 	ret void
 }
 
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 91758ea..5f1f4fd 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -192,3 +192,94 @@ define <4 x float> @hsubps4(<4 x float> %x) {
   %r = fsub <4 x float> %a, %b
   ret <4 x float> %r
 }
+
+; SSE3: vhaddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps1:
+; AVX: vhaddps
+define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps2:
+; AVX: vhaddps
+define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps3:
+; AVX: vhaddps
+define <8 x float> @vhaddps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps1:
+; AVX: vhsubps
+define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps3:
+; AVX: vhsubps
+define <8 x float> @vhsubps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; SSE3: haddpd
+; AVX: vhaddpd1:
+; AVX: vhaddpd
+define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fadd <4 x double> %a, %b
+  ret <4 x double> %r
+}
+
+; SSE3: vhsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; SSE3: hsubpd
+; AVX: vhsubpd1:
+; AVX: vhsubpd
+define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fsub <4 x double> %a, %b
+  ret <4 x double> %r
+}
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 1c8e2f9..617bd39 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -20,3 +20,10 @@ define void @test3(double %tmp) nounwind {
   call void asm sideeffect "$0", "q"(double %tmp) nounwind
   ret void
 }
+
+; rdar://10392864
+define void @test4(i8 signext %val, i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d) nounwind {
+entry:
+  %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index b07eeb6..d50a668 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -11,9 +11,9 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
-	volatile store i16 %tmp1, i16* @X, align 2
+	store volatile i16 %tmp1, i16* @X, align 2
 	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
-	volatile store i16 %tmp34, i16* @Y, align 2
+	store volatile i16 %tmp34, i16* @Y, align 2
 	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll
index d0d2bbd..d825b5a 100644
--- a/test/CodeGen/X86/lsr-nonaffine.ll
+++ b/test/CodeGen/X86/lsr-nonaffine.ll
@@ -19,7 +19,7 @@ entry:
 
 loop:
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  volatile store i64 %i, i64* %p
+  store volatile i64 %i, i64* %p
   %i.next = add i64 %i, %s
   %c = icmp slt i64 %i.next, %n
   br i1 %c, label %loop, label %exit
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 1f3b59a..b85ddeb 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -12,7 +12,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
-	volatile store i16 %1, i16* @X, align 2
+	store volatile i16 %1, i16* @X, align 2
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index e5a55ab..adfc38b 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,38 +1,38 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
 define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
 	ret i32 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 define i16 @t2(i16 %x) nounwind  {
-	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x )
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
 
 define i64 @t3(i64 %x) nounwind  {
-	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x )
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
 ; CHECK: t3:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x )
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
 	ret i8 %tmp
 ; CHECK: t4:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 2368548..928ad03 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -78,6 +78,22 @@ entry:
   ret i32 %shr.i
 }
 
+; PR11570
+define void @float_call_signbit(double %n) {
+entry:
+; FIXME: This should also use movmskps; we don't form the FGETSIGN node
+; in this case, though.
+; CHECK: float_call_signbit:
+; CHECK: movd %xmm0, %rdi
+; FIXME
+  %t0 = bitcast double %n to i64
+  %tobool.i.i.i.i = icmp slt i64 %t0, 0
+  tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
+  ret void
+}
+declare void @float_call_signbit_callee(i1 zeroext)
+
+
 ; rdar://10247336
 ; movmskp{s|d} only set low 4/2 bits, high bits are known zero
 
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 82b7331..8036710 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -52,8 +52,8 @@ bb:		; preds = %bb23
 	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
 	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
 	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp1718, i32* @var
-	volatile store i32 %tmp13, i32* @var
+	store volatile i32 %tmp1718, i32* @var
+	store volatile i32 %tmp13, i32* @var
 	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
 	store i32 %tmp22, i32* %i, align 4
@@ -86,7 +86,7 @@ bb28:		; preds = %bb46
 	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
 	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
 	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp41, i32* @var
+	store volatile i32 %tmp41, i32* @var
 	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
 	store i32 %tmp45, i32* %i, align 4
@@ -127,8 +127,8 @@ bb52:		; preds = %bb78
 	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
 	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
 	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp7273, i32* @var
-	volatile store i32 %tmp66, i32* @var
+	store volatile i32 %tmp7273, i32* @var
+	store volatile i32 %tmp66, i32* @var
 	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
 	store i32 %tmp77, i32* %i, align 4
@@ -161,7 +161,7 @@ bb84:		; preds = %bb101
 	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
 	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
 	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp96, i32* @var
+	store volatile i32 %tmp96, i32* @var
 	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
 	store i32 %tmp100, i32* %i, align 4
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index ef27cbc..7822453 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -67,7 +67,7 @@ declare void @exit(i32) noreturn
 ; DAG Combiner can't fold this into a load of the 1'th byte.
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
-  volatile store i32 128, i32* %P
+  store volatile i32 128, i32* %P
   %tmp4.pre = load i32* %P
   %phitmp = trunc i32 %tmp4.pre to i16
   %phitmp13 = shl i16 %phitmp, 8
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index c1fc041..d185af1 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -13,7 +13,7 @@
 
 define i32 @test1(i32 %X) {
         %Z = shl i32 %X, 2              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 5e18044..d48a331 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {xorps	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
new file mode 100644
index 0000000..cc1df2f
--- /dev/null
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: SHUFF0
+define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <8 x i32> <i32 2, i32 7, i32 1, i32 2, i32 4, i32 5, i32 1, i32 1>
+;CHECK: pshufd
+  ret <8 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF1
+define <4 x i32*> @SHUFF1(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <4 x i32> <i32 2, i32 7, i32 7, i32 2>
+;CHECK: pshufd
+  ret <4 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF3
+define <4 x i8*> @SHUFF3(<4 x i8*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i8*> %ptrv, <4 x i8*> undef, <4 x i32> <i32 2, i32 7, i32 1, i32 2>
+;CHECK: pshufd
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD0
+define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD1
+define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movdqa
+;CHECK: pshufd
+;CHECK: movdqa
+  %T = shufflevector <4 x i8*> %G, <4 x i8*> %G, <4 x i32> <i32 7, i32 1, i32 4, i32 3>
+  store <4 x i8*> %T, <4 x i8*>* %p
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD2
+define <4 x i8*> @LOAD2(<4 x i8*>* %p) nounwind {
+entry:
+  %I = alloca <4 x i8*>
+;CHECK: sub
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  store <4 x i8*> %G, <4 x i8*>* %I
+;CHECK: movaps
+  %Z = load <4 x i8*>* %I
+  ret <4 x i8*> %Z
+;CHECK: add
+;CHECK: ret
+}
+
+;CHECK: INT2PTR0
+define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+;CHECK: movaps
+  %K = ptrtoint <4 x i8*> %G to <4 x i32>
+;CHECK: ret
+  ret <4 x i32> %K
+}
+
+;CHECK: INT2PTR1
+define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
+entry:
+  %G = load <4 x i8>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: pand
+  %K = inttoptr <4 x i8> %G to <4 x i32*>
+;CHECK: ret
+  ret <4 x i32*> %K
+}
+
+;CHECK: BITCAST0
+define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+  %T = bitcast <4 x i8*> %G to <4 x i32*>
+;CHECK: movaps
+;CHECK: ret
+  ret <4 x i32*> %T
+}
+
+;CHECK: BITCAST1
+define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
+entry:
+  %G = load <2 x i8*>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pinsrd
+  %T = bitcast <2 x i8*> %G to <2 x i32*>
+;CHECK: ret
+  ret <2 x i32*> %T
+}
+
+;CHECK: ICMP0
+define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp sgt <4 x i8*> %g0, %g1
+  ;CHECK: pcmpgtd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
+;CHECK: ICMP1
+define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp eq <4 x i8*> %g0, %g1
+  ;CHECK: pcmpeqd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 945ec4c..9b0ef83 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 define i32 @main() {
 entry:
 ; CHECK: flds
-	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp6 = load volatile float* @a		; <float> [#uses=1]
 ; CHECK: fstps (%esp)
 ; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
@@ -41,7 +41,7 @@ entry:
 ; CHECK: fstp
 
 ; CHECK: fldl
-	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp12 = load volatile double* @b		; <double> [#uses=1]
 ; CHECK: fstpl (%esp)
 ; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 2a8bb35..02a3605 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -15,17 +15,17 @@ define void @loop_2() nounwind  {
 ; CHECK-NEXT: addl $3, (%{{.*}})
 ; CHECK-NEXT: ret
 
-  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp = load volatile i32* @x, align 4           ; <i32> [#uses=1]
   %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
-  volatile store i32 %tmp1, i32* @x, align 4
-  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1, i32* @x, align 4
+  %tmp.1 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.1, i32* @x, align 4
-  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.2, i32* @x, align 4
-  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.3, i32* @x, align 4
+  store volatile i32 %tmp1.3, i32* @x, align 4
   ret void
 }
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ebe11a5..ec2f302 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; rdar://10538297
 
 define void @t(i8* %ptr) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
new file mode 100644
index 0000000..0dd74ea
--- /dev/null
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test1:
+; CHECK-SSE: roundss $1
+
+; CHECK-AVX: test1:
+; CHECK-AVX: vroundss $1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test2:
+; CHECK-SSE: roundsd $1
+
+; CHECK-AVX: test2:
+; CHECK-AVX: vroundsd $1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test3:
+; CHECK-SSE: roundss $12
+
+; CHECK-AVX: test3:
+; CHECK-AVX: vroundss $12
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test4:
+; CHECK-SSE: roundsd $12
+
+; CHECK-AVX: test4:
+; CHECK-AVX: vroundsd $12
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test5:
+; CHECK-SSE: roundss $2
+
+; CHECK-AVX: test5:
+; CHECK-AVX: vroundss $2
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test6:
+; CHECK-SSE: roundsd $2
+
+; CHECK-AVX: test6:
+; CHECK-AVX: vroundsd $2
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test7(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test7:
+; CHECK-SSE: roundss $4
+
+; CHECK-AVX: test7:
+; CHECK-AVX: vroundss $4
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test8(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test8:
+; CHECK-SSE: roundsd $4
+
+; CHECK-AVX: test8:
+; CHECK-AVX: vroundsd $4
+}
+
+declare double @rint(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test9:
+; CHECK-SSE: roundss $3
+
+; CHECK-AVX: test9:
+; CHECK-AVX: vroundss $3
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test10:
+; CHECK-SSE: roundsd $3
+
+; CHECK-AVX: test10:
+; CHECK-AVX: vroundsd $3
+}
+
+declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index a92aca5..4f529c1 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -20,14 +20,11 @@ false:
 
 ; X32:      test_basic:
 
-; X32:      leal -12(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X32:      cmpl %gs:48, %esp
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
+; X32:      pushl $4
 ; X32-NEXT: pushl $12
 ; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
 ; X32-NEXT: ret 
 
 ; X32:      movl %esp, %eax
@@ -43,8 +40,7 @@ false:
 
 ; X64:      test_basic:
 
-; X64:      leaq -24(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X64:      cmpq %fs:112, %rsp
 
 ; X64:      movabsq $24, %r10
 ; X64-NEXT: movabsq $0, %r11
@@ -68,19 +64,14 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      leal (%esp), %edx
-; X32-NEXT: cmpl %gs:48, %edx
-
+; X32:      cmpl %gs:48, %esp
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
+; X32:      pushl $4
 ; X32-NEXT: pushl $0
 ; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
 ; X32-NEXT: ret
 
-; X64:      leaq (%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X64:      cmpq %fs:112, %rsp
 
 ; X64:      movq %r10, %rax
 ; X64-NEXT: movabsq $0, %r10
@@ -90,3 +81,26 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
 ; X64-NEXT: movq %rax, %r10
 
 }
+
+define void @test_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32:      leal -40012(%esp), %ecx
+; X32-NEXT: cmpl %gs:48, %ecx
+
+; X32:      pushl $0
+; X32-NEXT: pushl $40012
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X64:      leaq -40008(%rsp), %r11
+; X64-NEXT: cmpq %fs:112, %r11
+
+; X64:      movabsq $40008, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+}
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
index b2b9f81..a128af9 100644
--- a/test/CodeGen/X86/sext-subreg.ll
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -8,10 +8,10 @@ define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
 ; CHECK: movl %eax
   %C = add i64 %A, %B
   %D = trunc i64 %C to i32
-  volatile store i32 %D, i32* %P
+  store volatile i32 %D, i32* %P
   %E = shl i64 %C, 32
   %F = ashr i64 %E, 32  
-  volatile store i64 %F, i64 *%P2
-  volatile store i32 %D, i32* %P
+  store volatile i64 %F, i64 *%P2
+  store volatile i32 %D, i32* %P
   ret i64 undef
 }
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index 3b66f4f..d1e07c8 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -45,6 +45,7 @@ while.end:
 }
 
 ; CHECK: f2
+; CHECK: for.body
 ;
 ; This loop contains two cvtsi2ss instructions that update the same xmm
 ; register.  Verify that the execution dependency fix pass breaks those
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index af1a73b..1112440 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -140,15 +140,15 @@ define double @ole_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      x_ogt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ogt(double %x) nounwind {
@@ -158,15 +158,15 @@ define double @x_ogt(double %x) nounwind {
 }
 
 ; CHECK:      x_olt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_olt(double %x) nounwind {
@@ -176,17 +176,17 @@ define double @x_olt(double %x) nounwind {
 }
 
 ; CHECK:      x_ogt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -197,17 +197,17 @@ define double @x_ogt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_olt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -220,11 +220,11 @@ define double @x_olt_inverse(double %x) nounwind {
 ; CHECK:      x_oge:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_oge(double %x) nounwind {
@@ -236,11 +236,11 @@ define double @x_oge(double %x) nounwind {
 ; CHECK:      x_ole:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole:
-; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole:
-; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ole(double %x) nounwind {
@@ -252,12 +252,12 @@ define double @x_ole(double %x) nounwind {
 ; CHECK:      x_oge_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -270,12 +270,12 @@ define double @x_oge_inverse(double %x) nounwind {
 ; CHECK:      x_ole_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -414,11 +414,11 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      x_ugt:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ugt(double %x) nounwind {
@@ -430,11 +430,11 @@ define double @x_ugt(double %x) nounwind {
 ; CHECK:      x_ult:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ult(double %x) nounwind {
@@ -446,12 +446,12 @@ define double @x_ult(double %x) nounwind {
 ; CHECK:      x_ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -464,12 +464,12 @@ define double @x_ugt_inverse(double %x) nounwind {
 ; CHECK:      x_ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -480,16 +480,16 @@ define double @x_ult_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_uge:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_uge(double %x) nounwind {
@@ -499,16 +499,16 @@ define double @x_uge(double %x) nounwind {
 }
 
 ; CHECK:      x_ule:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ule(double %x) nounwind {
@@ -518,16 +518,16 @@ define double @x_ule(double %x) nounwind {
 }
 
 ; CHECK:      x_uge_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -538,16 +538,16 @@ define double @x_uge_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ule_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 793c026..f6c13ec 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -11,13 +11,13 @@ define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-        volatile store double %tmp4, double* %P
+        store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	volatile store double %tmp6, double* %P, align 8
+	store volatile double %tmp6, double* %P, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
index 37f86c6..aea85b9 100644
--- a/test/CodeGen/X86/store-empty-member.ll
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -9,6 +9,6 @@
 
 define void @foo() nounwind {
   %1 = alloca %testType
-  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  store volatile %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
   ret void
 }
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index d6c16ca..f1b9f20 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -314,7 +314,7 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 bbx:
@@ -323,7 +323,7 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 return:
@@ -352,8 +352,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -362,8 +362,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
@@ -390,8 +390,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -400,8 +400,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a1d797f..b7fe039 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -14,7 +14,7 @@ define i32 @test1(i32 %X) nounwind {
 ; CHECK-NOT: mov
 ; CHECK: leal 1(%rdi)
         %Z = add i32 %X, 1
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 04bb725..946b126 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
@@ -8,6 +8,7 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
+; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
 ; CHECK: pcmpgt
 ; CHECK: blendvps
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f0158d6..bddd535 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=x86-64
 
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 05a3a1e..2468735 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -5,7 +5,7 @@ define i32 @t() nounwind {
 entry:
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 1b104de..d038daf 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -5,7 +5,7 @@ entry:
 ; CHECK: punpckldq
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 66da013..96ef883 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -46,7 +46,7 @@ entry:
 
 ; rdar://10119696
 ; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
 entry:
   ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
   %u110.i = load double* %y, align 1
@@ -56,3 +56,22 @@ entry:
   ret <4 x float> %shuffle.i
 }
 
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+  %idx.ext = sext i32 %s to i64
+  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle1.i5
+}
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
new file mode 100644
index 0000000..d032eda
--- /dev/null
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: AGEP0
+define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
+entry:
+  %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
+  %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
+  %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
+  %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
+;CHECK: pslld
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+;CHECK: pslld
+;CHECK: padd
+  %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
+  ret <4 x i32*> %A3
+;CHECK: ret
+}
+
+;CHECK: AGEP1
+define i32 @AGEP1(<4 x i32*> %param) nounwind {
+entry:
+;CHECK: pslld
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP2
+define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP3
+define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %v = alloca i32
+  %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
+  ret <4 x i32*> %k
+;CHECK: ret
+}
+
+;CHECK: AGEP4
+define <4 x i8*> @AGEP4(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld
+;CHECK: padd
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+;CHECK: AGEP5
+define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
+entry:
+;CHECK: pslld
+;CHECK: padd
+  %A = getelementptr <4 x i8*> %param, <4 x i8> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index 2e5742a..1a82014 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
 @x = external global double
 
 define void @foo() nounwind  {
-  %a = volatile load double* @x
-  volatile store double 0.0, double* @x
-  volatile store double 0.0, double* @x
-  %b = volatile load double* @x
+  %a = load volatile double* @x
+  store volatile double 0.0, double* @x
+  store volatile double 0.0, double* @x
+  %b = load volatile double* @x
   ret void
 }
 
 define void @bar() nounwind  {
-  %c = volatile load double* @x
+  %c = load volatile double* @x
   ret void
 }
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 11d56f5..b959ce8 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: incw
+; CHECK: incl
 ; CHECK: incl
 ; CHECK: incl
 ; CHECK: addl
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index 639617f..9705d14 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
+; PR5626
 
 ; This load should be before the call, not after.
 
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 4470074..4242530 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -16,9 +16,9 @@ define double @foo() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foo:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
 
@@ -33,8 +33,8 @@ define float @foof() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foof:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
diff --git a/test/CodeGen/XCore/licm-ldwcp.ll b/test/CodeGen/XCore/licm-ldwcp.ll
index 4884f70..794c6bb 100644
--- a/test/CodeGen/XCore/licm-ldwcp.ll
+++ b/test/CodeGen/XCore/licm-ldwcp.ll
@@ -13,6 +13,6 @@ entry:
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
-  volatile store i32 525509670, i32* %p, align 4
+  store volatile i32 525509670, i32* %p, align 4
   br label %bb
 }
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
index 3181e96..5b612d0 100644
--- a/test/CodeGen/XCore/scavenging.ll
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -18,32 +18,32 @@ entry:
 	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
 	%0 = load i32* @size, align 4		; <i32> [#uses=1]
 	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
-	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
-	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
-	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
-	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
-	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
-	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
-	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
-	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
-	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
-	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
-	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
-	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = load volatile i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = load volatile i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = load volatile i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = load volatile i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = load volatile i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = load volatile i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = load volatile i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = load volatile i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = load volatile i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = load volatile i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = load volatile i32* @g11, align 4		; <i32> [#uses=2]
 	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
 	store i32 %13, i32* %14, align 4
-	volatile store i32 %13, i32* @g11, align 4
-	volatile store i32 %12, i32* @g10, align 4
-	volatile store i32 %11, i32* @g9, align 4
-	volatile store i32 %10, i32* @g8, align 4
-	volatile store i32 %9, i32* @g7, align 4
-	volatile store i32 %8, i32* @g6, align 4
-	volatile store i32 %7, i32* @g5, align 4
-	volatile store i32 %6, i32* @g4, align 4
-	volatile store i32 %5, i32* @g3, align 4
-	volatile store i32 %4, i32* @g2, align 4
-	volatile store i32 %3, i32* @g1, align 4
-	volatile store i32 %2, i32* @g0, align 4
+	store volatile i32 %13, i32* @g11, align 4
+	store volatile i32 %12, i32* @g10, align 4
+	store volatile i32 %11, i32* @g9, align 4
+	store volatile i32 %10, i32* @g8, align 4
+	store volatile i32 %9, i32* @g7, align 4
+	store volatile i32 %8, i32* @g6, align 4
+	store volatile i32 %7, i32* @g5, align 4
+	store volatile i32 %6, i32* @g4, align 4
+	store volatile i32 %5, i32* @g3, align 4
+	store volatile i32 %4, i32* @g2, align 4
+	store volatile i32 %3, i32* @g1, align 4
+	store volatile i32 %2, i32* @g0, align 4
 	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @g(i32* %x1, i32* %1) nounwind
 	ret void
diff --git a/test/DebugInfo/2009-01-15-dbg_declare.ll b/test/DebugInfo/2009-01-15-dbg_declare.ll
deleted file mode 100644
index ab404af..0000000
--- a/test/DebugInfo/2009-01-15-dbg_declare.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o /dev/null
-
-        %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
-@llvm.dbg.variable24 = external constant %llvm.dbg.variable.type                ; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-define i32 @isascii(i32 %_c) nounwind {
-entry:
-	%j = alloca i32
-	%0 = bitcast i32* %j to { }*
-        call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable24 to { }*))
-        unreachable
-}
-
-
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 0e1d1fd..94bddc0 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -19,7 +19,7 @@ entry:
   %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
   %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
   call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
-  %a9 = volatile load i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
   %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
   %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
   %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
@@ -29,7 +29,7 @@ entry:
   call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
   call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
   call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
-  %a13 = volatile load i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
   %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
diff --git a/test/Feature/const_pv.ll b/test/Feature/const_pv.ll
new file mode 100644
index 0000000..6fd6abd
--- /dev/null
+++ b/test/Feature/const_pv.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -disable-output
+@G = constant <3 x i64> ptrtoint (<3 x i8*> <i8* null, i8* null, i8* null> to <3 x i64>)
+
+@G1 = global i8 zeroinitializer
+@g = constant <2 x i8*> getelementptr (<2 x i8*> <i8* @G1, i8* @G1>, <2 x i32> <i32 0, i32 0>)
+
+@t = constant <2 x i1> icmp ((<2 x i32> ptrtoint (<2 x i8*> zeroinitializer to <2 x i32>), <2 x i32> zeroinitializer )
+
diff --git a/test/Feature/global_pv.ll b/test/Feature/global_pv.ll
new file mode 100644
index 0000000..d257ec0
--- /dev/null
+++ b/test/Feature/global_pv.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instcombine -S -o - %s | llvm-as
+; RUN: opt -instcombine -globalopt -S -o - %s | llvm-as
+@G1 = global i32 zeroinitializer
+@G2 = global i32 zeroinitializer
+@g = global <2 x i32*> zeroinitializer
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @test }]
+define internal void @test() {
+  %A = insertelement <2 x i32*> undef, i32* @G1, i32 0
+  %B = insertelement <2 x i32*> %A,  i32* @G2, i32 1
+  store <2 x i32*> %B, <2 x i32*>* @g
+  ret void
+}
+
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 2dd6b53..c4e3db6 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -6,7 +6,6 @@ declare i1 @llvm.isunordered.f32(float, float)
 
 declare i1 @llvm.isunordered.f64(double, double)
 
-declare void @llvm.prefetch(i8*, i32, i32)
 
 declare i8 @llvm.ctpop.i8(i8)
 
@@ -16,21 +15,21 @@ declare i32 @llvm.ctpop.i32(i32)
 
 declare i64 @llvm.ctpop.i64(i64)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
 declare float @llvm.sqrt.f32(float)
 
@@ -41,21 +40,20 @@ declare double @llvm.sqrt.f64(double)
 define void @libm() {
         fcmp uno float 1.000000e+00, 2.000000e+00               ; <i1>:1 [#uses=0]
         fcmp uno double 3.000000e+00, 4.000000e+00              ; <i1>:2 [#uses=0]
-        call void @llvm.prefetch( i8* null, i32 1, i32 3 )
         call float @llvm.sqrt.f32( float 5.000000e+00 )         ; <float>:3 [#uses=0]
         call double @llvm.sqrt.f64( double 6.000000e+00 )               ; <double>:4 [#uses=0]
         call i8  @llvm.ctpop.i8( i8 10 )                ; <i32>:5 [#uses=0]
         call i16 @llvm.ctpop.i16( i16 11 )              ; <i32>:6 [#uses=0]
         call i32 @llvm.ctpop.i32( i32 12 )              ; <i32>:7 [#uses=0]
         call i64 @llvm.ctpop.i64( i64 13 )              ; <i32>:8 [#uses=0]
-        call i8  @llvm.ctlz.i8( i8 14 )         ; <i32>:9 [#uses=0]
-        call i16 @llvm.ctlz.i16( i16 15 )               ; <i32>:10 [#uses=0]
-        call i32 @llvm.ctlz.i32( i32 16 )               ; <i32>:11 [#uses=0]
-        call i64 @llvm.ctlz.i64( i64 17 )               ; <i32>:12 [#uses=0]
-        call i8  @llvm.cttz.i8( i8 18 )         ; <i32>:13 [#uses=0]
-        call i16 @llvm.cttz.i16( i16 19 )               ; <i32>:14 [#uses=0]
-        call i32 @llvm.cttz.i32( i32 20 )               ; <i32>:15 [#uses=0]
-        call i64 @llvm.cttz.i64( i64 21 )               ; <i32>:16 [#uses=0]
+        call i8  @llvm.ctlz.i8( i8 14, i1 true )         ; <i32>:9 [#uses=0]
+        call i16 @llvm.ctlz.i16( i16 15, i1 true )               ; <i32>:10 [#uses=0]
+        call i32 @llvm.ctlz.i32( i32 16, i1 true )               ; <i32>:11 [#uses=0]
+        call i64 @llvm.ctlz.i64( i64 17, i1 true )               ; <i32>:12 [#uses=0]
+        call i8  @llvm.cttz.i8( i8 18, i1 true )         ; <i32>:13 [#uses=0]
+        call i16 @llvm.cttz.i16( i16 19, i1 true )               ; <i32>:14 [#uses=0]
+        call i32 @llvm.cttz.i32( i32 20, i1 true )               ; <i32>:15 [#uses=0]
+        call i64 @llvm.cttz.i64( i64 21, i1 true )               ; <i32>:16 [#uses=0]
         ret void
 }
 
diff --git a/test/Linker/2004-05-07-TypeResolution1.ll b/test/Linker/2004-05-07-TypeResolution1.ll
index f0ade33..4cff9ac 100644
--- a/test/Linker/2004-05-07-TypeResolution1.ll
+++ b/test/Linker/2004-05-07-TypeResolution1.ll
@@ -30,6 +30,6 @@ declare void @func(%struct2*)
 
 define void @tty_init() {
 entry:
-	volatile store void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
+	store volatile void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
 	ret void
 }
diff --git a/test/Linker/2004-05-07-TypeResolution2.ll b/test/Linker/2004-05-07-TypeResolution2.ll
index 74fe39f..3807127 100644
--- a/test/Linker/2004-05-07-TypeResolution2.ll
+++ b/test/Linker/2004-05-07-TypeResolution2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32"
 define internal void @f1(%struct1* %tty) {
 loopentry.preheader:
 	%tmp.2.i.i = getelementptr %struct1* %tty, i64 0, i32 1		; <void (%struct2*)**> [#uses=1]
-	%tmp.3.i.i = volatile load void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
+	%tmp.3.i.i = load volatile void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
 	ret void
 }
 
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 133967b..080bc6f 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -153,6 +153,7 @@ Lforward:
         add r4, r5, r6, asr #5
         add r4, r5, r6, ror #5
         add r6, r7, r8, lsl r9
+        add r4, r4, r3, asl r9
         add r6, r7, r8, lsr r9
         add r6, r7, r8, asr r9
         add r6, r7, r8, ror r9
@@ -172,6 +173,9 @@ Lforward:
         add r6, r7, ror r9
         add r4, r5, rrx
 
+	add r0, #-4
+	add r4, r5, #-21
+
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe0]
@@ -180,12 +184,12 @@ Lforward:
 @ CHECK: add	r4, r5, r6, asr #5      @ encoding: [0xc6,0x42,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, ror #5      @ encoding: [0xe6,0x42,0x85,0xe0]
 @ CHECK: add	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0x87,0xe0]
+@ CHECK: add	r4, r4, r3, lsl r9      @ encoding: [0x13,0x49,0x84,0xe0]
 @ CHECK: add	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0x87,0xe0]
 @ CHECK: add	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe0]
 
-
 @ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
 @ CHECK: add	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe0]
 @ CHECK: add	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe0]
@@ -199,6 +203,9 @@ Lforward:
 @ CHECK: add	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x86,0xe0]
 @ CHECK: add	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x84,0xe0]
 
+@ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
+@ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
+
 
 @------------------------------------------------------------------------------
 @ AND
@@ -215,6 +222,7 @@ Lforward:
     and r6, r7, r8, asr r2
     and r6, r7, r8, ror r2
     and r10, r1, r6, rrx
+    and r2, r3, #0x7fffffff
 
     @ destination register is optional
     and r1, #0xf
@@ -242,6 +250,7 @@ Lforward:
 @ CHECK: and	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0x07,0xe0]
 @ CHECK: and	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0x07,0xe0]
 @ CHECK: and	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0x01,0xe0]
+@ CHECK: bic	r2, r3, #-2147483648    @ encoding: [0x02,0x21,0xc3,0xe3]
 
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
 @ CHECK: and	r10, r10, r1            @ encoding: [0x01,0xa0,0x0a,0xe0]
@@ -484,6 +493,7 @@ Lforward:
         cmp r7, r8, asr r2
         cmp r7, r8, ror r2
         cmp r1, r6, rrx
+        cmp r0, #-2
 
 @ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
 @ CHECK: cmp	r1, r6                  @ encoding: [0x06,0x00,0x51,0xe1]
@@ -497,6 +507,7 @@ Lforward:
 @ CHECK: cmp	r7, r8, asr r2          @ encoding: [0x58,0x02,0x57,0xe1]
 @ CHECK: cmp	r7, r8, ror r2          @ encoding: [0x78,0x02,0x57,0xe1]
 @ CHECK: cmp	r1, r6, rrx             @ encoding: [0x66,0x00,0x51,0xe1]
+@ CHECK: cmn	r0, #2                  @ encoding: [0x02,0x00,0x70,0xe3]
 
 
 @------------------------------------------------------------------------------
@@ -755,6 +766,10 @@ Lforward:
         ldmda     r2!, {r1,r3-r6,sp}
         ldmdb     r2!, {r1,r3-r6,sp}
 
+        @ system version
+        ldm r0, {r0, r2, lr}^
+        ldm sp!, {r0-r3, pc}^
+
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
@@ -766,6 +781,8 @@ Lforward:
 @ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
 @ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
 @ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+@ CHECK: ldm	r0, {lr, r0, r2} ^          @ encoding: [0x05,0x40,0xd0,0xe8]
+@ CHECK: ldm	sp!, {pc, r0, r1, r2, r3} ^ @ encoding: [0x0f,0x80,0xfd,0xe8]
 
 
 @------------------------------------------------------------------------------
@@ -1013,7 +1030,6 @@ Lforward:
 @ CHECK: muls	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xe0]
 @ CHECK: mulgt	r5, r6, r7              @ encoding: [0x96,0x07,0x05,0xc0]
 @ CHECK: mulsle	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xd0]
-@ CHECK: mul	r11, r11, r5            @ encoding: [0x9b,0x05,0x0b,0xe0]
 
 
 @------------------------------------------------------------------------------
@@ -1072,6 +1088,14 @@ Lforward:
 @ CHECK: mvnslt	r5, r6, ror r7          @ encoding: [0x76,0x57,0xf0,0xb1]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r8
+
+@ CHECK: rsb	r5, r8, #0              @ encoding: [0x00,0x50,0x68,0xe2]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 0dbde19..be640f0 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -74,6 +74,7 @@ _func:
         addw r12, r6, #0x100
         adds r1, r2, #0x1f0
 	add r2, #1
+        add r0, r0, #32
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
@@ -87,6 +88,7 @@ _func:
 @ CHECK: addw	r12, r6, #256           @ encoding: [0x06,0xf2,0x00,0x1c]
 @ CHECK: adds.w	r1, r2, #496            @ encoding: [0x12,0xf5,0xf8,0x71]
 @ CHECK: add.w	r2, r2, #1              @ encoding: [0x02,0xf1,0x01,0x02]
+@ CHECK: add.w	r0, r0, #32             @ encoding: [0x00,0xf1,0x20,0x00]
 
 
 @------------------------------------------------------------------------------
@@ -97,12 +99,16 @@ _func:
         adds r7, r3, r1, lsl #31
         adds.w r0, r3, r6, lsr #25
         add.w r4, r8, r1, ror #12
+        add r10, r8
+        add r10, r10, r8
 
 @ CHECK: add.w	r1, r2, r8              @ encoding: [0x02,0xeb,0x08,0x01]
 @ CHECK: add.w	r5, r9, r2, asr #32     @ encoding: [0x09,0xeb,0x22,0x05]
 @ CHECK: adds.w	r7, r3, r1, lsl #31     @ encoding: [0x13,0xeb,0xc1,0x77]
 @ CHECK: adds.w	r0, r3, r6, lsr #25     @ encoding: [0x13,0xeb,0x56,0x60]
 @ CHECK: add.w	r4, r8, r1, ror #12     @ encoding: [0x08,0xeb,0x31,0x34]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
 
 
 @------------------------------------------------------------------------------
@@ -362,6 +368,7 @@ _func:
         cmp sp, r6, lsr #1
         cmp r2, r5, asr #24
         cmp r1, r4, ror #15
+        cmp r0, #-2
 
 @ CHECK: cmp.w	r5, #65280              @ encoding: [0xb5,0xf5,0x7f,0x4f]
 @ CHECK: cmp.w	r4, r12                 @ encoding: [0xb4,0xeb,0x0c,0x0f]
@@ -370,7 +377,7 @@ _func:
 @ CHECK: cmp.w	sp, r6, lsr #1          @ encoding: [0xbd,0xeb,0x56,0x0f]
 @ CHECK: cmp.w	r2, r5, asr #24         @ encoding: [0xb2,0xeb,0x25,0x6f]
 @ CHECK: cmp.w	r1, r4, ror #15         @ encoding: [0xb1,0xeb,0xf4,0x3f]
-
+@ CHECK: cmn.w	r0, #2                  @ encoding: [0x10,0xf1,0x02,0x0f]
 
 @------------------------------------------------------------------------------
 @ DBG
@@ -575,6 +582,7 @@ _func:
         ldm r4, {r5, r6}
         ldm r5!, {r3, r8}
         ldmfd r5!, {r3, r8}
+        ldmia sp!, {r4-r11, pc}
 
 @ CHECK: ldm.w	r4, {r4, r5, r8, r9}    @ encoding: [0x94,0xe8,0x30,0x03]
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
@@ -592,6 +600,7 @@ _func:
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
+@ CHECK: pop.w	{pc, r4, r5, r6, r7, r8, r9, r10, r11} @ encoding: [0xbd,0xe8,0xf0,0x8f]
 
 
 @------------------------------------------------------------------------------
@@ -1139,6 +1148,18 @@ _func:
 
 @ CHECK: mvn	r3, #2                  @ encoding: [0x6f,0xf0,0x02,0x03]
 
+@------------------------------------------------------------------------------
+@ MOV(shifted register)
+@------------------------------------------------------------------------------
+        mov r6, r2, lsl #16
+        mov r6, r2, lsr #16
+        movs r6, r2, asr #32
+        movs r6, r2, ror #5
+
+@ CHECK: lsl.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x02,0x46]
+@ CHECK: lsr.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x12,0x46]
+@ CHECK: asrs	r6, r2, #32             @ encoding: [0x16,0x10]
+@ CHECK: rors.w	r6, r2, #5              @ encoding: [0x5f,0xea,0x72,0x16]
 
 
 @------------------------------------------------------------------------------
@@ -1230,7 +1251,7 @@ _func:
         muleq r3, r4, r5
         it le
         mulle r4, r4, r8
-        mul r6, r5
+        mul r5, r6
 
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 @ CHECK: mul	r3, r4, r3              @ encoding: [0x04,0xfb,0x03,0xf3]
@@ -1239,7 +1260,7 @@ _func:
 @ CHECK: muleq	r3, r4, r5              @ encoding: [0x04,0xfb,0x05,0xf3]
 @ CHECK: it	le                      @ encoding: [0xd8,0xbf]
 @ CHECK: mulle	r4, r4, r8              @ encoding: [0x04,0xfb,0x08,0xf4]
-@ CHECK: mul	r6, r6, r5              @ encoding: [0x06,0xfb,0x05,0xf6]
+@ CHECK: mul	r5, r6, r5              @ encoding: [0x06,0xfb,0x05,0xf5]
 
 
 @------------------------------------------------------------------------------
@@ -1286,6 +1307,16 @@ _func:
 @ CHECK: mvneq	r2, r3                  @ encoding: [0xda,0x43]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r2
+        neg r5, r8
+
+@ CHECK: rsb.w	r5, r2, #0              @ encoding: [0xc2,0xf1,0x00,0x05]
+@ CHECK: rsb.w	r5, r8, #0              @ encoding: [0xc8,0xf1,0x00,0x05]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop.w
@@ -1365,12 +1396,16 @@ _func:
         pld [r6, #33]
         pld [r6, #257]
         pld [r7, #257]
+        pld [r1, #0]
+        pld [r1, #-0]
 
 @ CHECK: pld	[r5, #-4]               @ encoding: [0x15,0xf8,0x04,0xfc]
 @ CHECK: pld	[r6, #32]               @ encoding: [0x96,0xf8,0x20,0xf0]
 @ CHECK: pld	[r6, #33]               @ encoding: [0x96,0xf8,0x21,0xf0]
 @ CHECK: pld	[r6, #257]              @ encoding: [0x96,0xf8,0x01,0xf1]
 @ CHECK: pld	[r7, #257]              @ encoding: [0x97,0xf8,0x01,0xf1]
+@ CHECK: pld	[r1]                    @ encoding: [0x91,0xf8,0x00,0xf0]
+@ CHECK: pld	[r1, #-0]               @ encoding: [0x11,0xf8,0x00,0xfc]
 
 
 @------------------------------------------------------------------------------
@@ -1646,11 +1681,19 @@ _func:
         rsbs r3, r12, #0xf
         rsb r1, #0xff
         rsb r1, r1, #0xff
+        rsb r11, r11, #0
+        rsb r9, #0
+        rsbs r3, r1, #0
+        rsb r3, r1, #0
 
 @ CHECK: rsb.w	r2, r5, #1044480        @ encoding: [0xc5,0xf5,0x7f,0x22]
 @ CHECK: rsbs.w	r3, r12, #15            @ encoding: [0xdc,0xf1,0x0f,0x03]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
+@ CHECK: rsb.w	r11, r11, #0            @ encoding: [0xcb,0xf1,0x00,0x0b]
+@ CHECK: rsb.w	r9, r9, #0              @ encoding: [0xc9,0xf1,0x00,0x09]
+@ CHECK: rsbs	r3, r1, #0              @ encoding: [0x4b,0x42]
+@ CHECK: rsb.w	r3, r1, #0              @ encoding: [0xc1,0xf1,0x00,0x03]
 
 
 @------------------------------------------------------------------------------
@@ -2566,6 +2609,7 @@ _func:
         subw r12, r6, #0x100
         subs r1, r2, #0x1f0
 	sub r2, #1
+        sub r0, r0, #32
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: subeq	r1, r2, #4              @ encoding: [0x11,0x1f]
@@ -2579,6 +2623,7 @@ _func:
 @ CHECK: subw	r12, r6, #256           @ encoding: [0xa6,0xf2,0x00,0x1c]
 @ CHECK: subs.w	r1, r2, #496            @ encoding: [0xb2,0xf5,0xf8,0x71]
 @ CHECK: sub.w	r2, r2, #1              @ encoding: [0xa2,0xf1,0x01,0x02]
+@ CHECK: sub.w	r0, r0, #32             @ encoding: [0xa0,0xf1,0x20,0x00]
 
 
 @------------------------------------------------------------------------------
diff --git a/test/MC/ARM/dot-req.s b/test/MC/ARM/dot-req.s
new file mode 100644
index 0000000..3b4cf5c
--- /dev/null
+++ b/test/MC/ARM/dot-req.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+        .syntax unified
+bar:
+fred .req r5
+        mov r11, fred
+.unreq fred
+fred .req r6
+        mov r1, fred
+
+@ CHECK: mov	r11, r5                 @ encoding: [0x05,0xb0,0xa0,0xe1]
+@ CHECK: mov	r1, r6                  @ encoding: [0x06,0x10,0xa0,0xe1]
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index e6efe7e..6899d92 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -42,12 +42,12 @@ entry:
   ]
 
 bb:                                               ; preds = %entry
-  volatile store i32 11, i32* @var_tls, align 4
-  volatile store double 2.200000e+01, double* @var_tls_double, align 8
-  volatile store i32 33, i32* @var_static, align 4
-  volatile store double 4.400000e+01, double* @var_static_double, align 8
-  volatile store i32 55, i32* @var_global, align 4
-  volatile store double 6.600000e+01, double* @var_global_double, align 8
+  store volatile i32 11, i32* @var_tls, align 4
+  store volatile double 2.200000e+01, double* @var_tls_double, align 8
+  store volatile i32 33, i32* @var_static, align 4
+  store volatile double 4.400000e+01, double* @var_static_double, align 8
+  store volatile i32 55, i32* @var_global, align 4
+  store volatile double 6.600000e+01, double* @var_global_double, align 8
   br label %bb3
 
 bb2:                                              ; preds = %entry
diff --git a/test/MC/ARM/mode-switch.s b/test/MC/ARM/mode-switch.s
index 9d49954..afcc082 100644
--- a/test/MC/ARM/mode-switch.s
+++ b/test/MC/ARM/mode-switch.s
@@ -13,3 +13,14 @@
 .code 16
         adds    r0, r0, r1
 @ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
+
+.arm
+	add	r0, r0, r1
+@ CHECK: add	r0, r0, r1              @ encoding: [0x01,0x00,0x80,0xe0]
+
+.thumb
+	add.w	r0, r0, r1
+        adds    r0, r0, r1
+
+@ CHECK: add.w	r0, r0, r1              @ encoding: [0x00,0xeb,0x01,0x00]
+@ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
diff --git a/test/MC/ARM/neon-add-encoding.s b/test/MC/ARM/neon-add-encoding.s
index e425397..1fdfa4c 100644
--- a/test/MC/ARM/neon-add-encoding.s
+++ b/test/MC/ARM/neon-add-encoding.s
@@ -90,39 +90,81 @@
 @ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf3]
 	vrhadd.u32	q8, q8, q9
 
-@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
 	vqadd.s8	d16, d16, d17
-@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
 	vqadd.s16	d16, d16, d17
-@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
 	vqadd.s32	d16, d16, d17
-@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
 	vqadd.s64	d16, d16, d17
-@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
 	vqadd.u8	d16, d16, d17
-@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
 	vqadd.u16	d16, d16, d17
-@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
 	vqadd.u32	d16, d16, d17
-@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
 	vqadd.u64	d16, d16, d17
-@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
 	vqadd.s8	q8, q8, q9
-@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
 	vqadd.s16	q8, q8, q9
-@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
 	vqadd.s32	q8, q8, q9
-@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
 	vqadd.s64	q8, q8, q9
-@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
 	vqadd.u8	q8, q8, q9
-@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
 	vqadd.u16	q8, q8, q9
-@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
 	vqadd.u32	q8, q8, q9
-@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
 	vqadd.u64	q8, q8, q9
 
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
+@ two-operand variants.
+	vqadd.s8	d16, d17
+	vqadd.s16	d16, d17
+	vqadd.s32	d16, d17
+	vqadd.s64	d16, d17
+	vqadd.u8	d16, d17
+	vqadd.u16	d16, d17
+	vqadd.u32	d16, d17
+	vqadd.u64	d16, d17
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
+	vqadd.s8	q8, q9
+	vqadd.s16	q8, q9
+	vqadd.s32	q8, q9
+	vqadd.s64	q8, q9
+	vqadd.u8	q8, q9
+	vqadd.u16	q8, q9
+	vqadd.u32	q8, q9
+	vqadd.u64	q8, q9
+
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
 @ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf2]
 	vaddhn.i16	d16, q8, q9
 @ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf2]
@@ -135,3 +177,43 @@
 	vraddhn.i32	d16, q8, q9
 @ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf3]
 	vraddhn.i64	d16, q8, q9
+
+
+@ Two-operand variants
+
+	vadd.i8  d6, d5
+	vadd.i16 d7, d1
+	vadd.i32 d8, d2
+	vadd.i64 d9, d3
+
+	vadd.i8  q6, q5
+	vadd.i16 q7, q1
+	vadd.i32 q8, q2
+	vadd.i64 q9, q3
+
+@ CHECK: vadd.i8	d6, d6, d5      @ encoding: [0x05,0x68,0x06,0xf2]
+@ CHECK: vadd.i16	d7, d7, d1      @ encoding: [0x01,0x78,0x17,0xf2]
+@ CHECK: vadd.i32	d8, d8, d2      @ encoding: [0x02,0x88,0x28,0xf2]
+@ CHECK: vadd.i64	d9, d9, d3      @ encoding: [0x03,0x98,0x39,0xf2]
+
+@ CHECK: vadd.i8	q6, q6, q5      @ encoding: [0x4a,0xc8,0x0c,0xf2]
+@ CHECK: vadd.i16	q7, q7, q1      @ encoding: [0x42,0xe8,0x1e,0xf2]
+@ CHECK: vadd.i32	q8, q8, q2      @ encoding: [0xc4,0x08,0x60,0xf2]
+@ CHECK: vadd.i64	q9, q9, q3      @ encoding: [0xc6,0x28,0x72,0xf2]
+
+
+	vaddw.s8  q6, d5
+	vaddw.s16 q7, d1
+	vaddw.s32 q8, d2
+
+	vaddw.u8  q6, d5
+	vaddw.u16 q7, d1
+	vaddw.u32 q8, d2
+
+@ CHECK: vaddw.s8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf2]
+@ CHECK: vaddw.s16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf2]
+@ CHECK: vaddw.s32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf2]
+
+@ CHECK: vaddw.u8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf3]
+@ CHECK: vaddw.u16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf3]
+@ CHECK: vaddw.u32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf3]
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index 0922cac..2ce9bcc 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -230,3 +230,34 @@
 
 @ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
 @ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ Two-operand aliases
+	vand.s8  q6, q5
+	vand.s16 q7, q1
+	vand.s32 q8, q2
+	vand.f64 q8, q2
+
+	veor.8   q6, q5
+	veor.p16 q7, q1
+	veor.u32 q8, q2
+	veor.d   q8, q2
+
+	veor.i8  q6, q5
+	veor.16  q7, q1
+	veor.f   q8, q2
+	veor.i64 q8, q2
+
+@ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
+@ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index d94e2f7..b3aedb8 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -111,3 +111,66 @@
 @ CHECK: vcle.s8	d16, d16, #0    @ encoding: [0xa0,0x01,0xf1,0xf3]
 @ CHECK: vcgt.s8	d16, d16, #0    @ encoding: [0x20,0x00,0xf1,0xf3]
 @ CHECK: vclt.s8	d16, d16, #0    @ encoding: [0x20,0x02,0xf1,0xf3]
+
+
+        vclt.s8 q12, q13, q3
+        vclt.s16 q12, q13, q3
+        vclt.s32 q12, q13, q3
+        vclt.u8 q12, q13, q3
+        vclt.u16 q12, q13, q3
+        vclt.u32 q12, q13, q3
+        vclt.f32 q12, q13, q3
+
+        vclt.s8 d12, d13, d3
+        vclt.s16 d12, d13, d3
+        vclt.s32 d12, d13, d3
+        vclt.u8 d12, d13, d3
+        vclt.u16 d12, d13, d3
+        vclt.u32 d12, d13, d3
+        vclt.f32 d12, d13, d3
+
+@ CHECK: vcgt.s8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf2]
+@ CHECK: vcgt.s16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf2]
+@ CHECK: vcgt.s32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf2]
+@ CHECK: vcgt.u8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf3]
+@ CHECK: vcgt.u16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf3]
+@ CHECK: vcgt.u32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf3]
+@ CHECK: vcgt.f32	q12, q3, q13    @ encoding: [0x6a,0x8e,0x66,0xf3]
+
+@ CHECK: vcgt.s8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf2]
+@ CHECK: vcgt.s16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf2]
+@ CHECK: vcgt.s32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf2]
+@ CHECK: vcgt.u8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf3]
+@ CHECK: vcgt.u16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf3]
+@ CHECK: vcgt.u32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf3]
+@ CHECK: vcgt.f32	d12, d3, d13    @ encoding: [0x0d,0xce,0x23,0xf3]
+
+	vcle.s8	d16, d16, d17
+	vcle.s16 d16, d16, d17
+	vcle.s32 d16, d16, d17
+	vcle.u8	d16, d16, d17
+	vcle.u16 d16, d16, d17
+	vcle.u32 d16, d16, d17
+	vcle.f32 d16, d16, d17
+	vcle.s8	q8, q8, q9
+	vcle.s16 q8, q8, q9
+	vcle.s32 q8, q8, q9
+	vcle.u8	q8, q8, q9
+	vcle.u16 q8, q8, q9
+	vcle.u32 q8, q8, q9
+	vcle.f32 q8, q8, q9
+
+@ CHECK: vcge.s8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf2]
+@ CHECK: vcge.s16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf2]
+@ CHECK: vcge.s32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf2]
+@ CHECK: vcge.u8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf3]
+@ CHECK: vcge.u16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf3]
+@ CHECK: vcge.u32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf3]
+@ CHECK: vcge.f32	d16, d17, d16   @ encoding: [0xa0,0x0e,0x41,0xf3]
+@ CHECK: vcge.s8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf2]
+@ CHECK: vcge.s16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf2]
+@ CHECK: vcge.s32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf2]
+@ CHECK: vcge.u8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf3]
+@ CHECK: vcge.u16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf3]
+@ CHECK: vcge.u32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf3]
+@ CHECK: vcge.f32	q8, q9, q8      @ encoding: [0xe0,0x0e,0x42,0xf3]
diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s
index 1c7caf2..d6bc1f3 100644
--- a/test/MC/ARM/neon-mul-encoding.s
+++ b/test/MC/ARM/neon-mul-encoding.s
@@ -1,6 +1,5 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-
 	vmul.i8	d16, d16, d17
 	vmul.i16	d16, d16, d17
 	vmul.i32	d16, d16, d17
@@ -13,6 +12,17 @@
 	vmul.p8	q8, q8, q9
 	vmul.i16	d18, d8, d0[3]
 
+	vmul.i8	d16, d17
+	vmul.i16	d16, d17
+	vmul.i32	d16, d17
+	vmul.f32	d16, d17
+	vmul.i8	q8, q9
+	vmul.i16	q8, q9
+	vmul.i32	q8, q9
+	vmul.f32	q8, q9
+	vmul.p8	d16, d17
+	vmul.p8	q8, q9
+
 @ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
 @ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
 @ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
@@ -25,17 +35,36 @@
 @ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
 @ CHECK: vmul.i16	d18, d8, d0[3]  @ encoding: [0x68,0x28,0xd8,0xf2]
 
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf3]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
+
 
 	vqdmulh.s16	d16, d16, d17
 	vqdmulh.s32	d16, d16, d17
 	vqdmulh.s16	q8, q8, q9
 	vqdmulh.s32	q8, q8, q9
+	vqdmulh.s16	d16, d17
+	vqdmulh.s32	d16, d17
+	vqdmulh.s16	q8, q9
+	vqdmulh.s32	q8, q9
 	vqdmulh.s16	d11, d2, d3[0]
 
 @ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
 @ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
 @ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
 @ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
 @ CHECK: vqdmulh.s16	d11, d2, d3[0]  @ encoding: [0x43,0xbc,0x92,0xf2]
 
 
@@ -72,3 +101,68 @@
 
 @ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
 @ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+
+
+        vmul.i16 d0, d4[2]
+        vmul.s16 d1, d7[3]
+        vmul.u16 d2, d1[1]
+        vmul.i32 d3, d2[0]
+        vmul.s32 d4, d3[1]
+        vmul.u32 d5, d4[0]
+        vmul.f32 d6, d5[1]
+
+        vmul.i16 q0, d4[2]
+        vmul.s16 q1, d7[3]
+        vmul.u16 q2, d1[1]
+        vmul.i32 q3, d2[0]
+        vmul.s32 q4, d3[1]
+        vmul.u32 q5, d4[0]
+        vmul.f32 q6, d5[1]
+
+        vmul.i16 d9, d0, d4[2]
+        vmul.s16 d8, d1, d7[3]
+        vmul.u16 d7, d2, d1[1]
+        vmul.i32 d6, d3, d2[0]
+        vmul.s32 d5, d4, d3[1]
+        vmul.u32 d4, d5, d4[0]
+        vmul.f32 d3, d6, d5[1]
+
+        vmul.i16 q9, q0, d4[2]
+        vmul.s16 q8, q1, d7[3]
+        vmul.u16 q7, q2, d1[1]
+        vmul.i32 q6, q3, d2[0]
+        vmul.s32 q5, q4, d3[1]
+        vmul.u32 q4, q5, d4[0]
+        vmul.f32 q3, q6, d5[1]
+
+@ CHECK: vmul.i16	d0, d0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf2]
+@ CHECK: vmul.i16	d1, d1, d7[3]   @ encoding: [0x6f,0x18,0x91,0xf2]
+@ CHECK: vmul.i16	d2, d2, d1[1]   @ encoding: [0x49,0x28,0x92,0xf2]
+@ CHECK: vmul.i32	d3, d3, d2[0]   @ encoding: [0x42,0x38,0xa3,0xf2]
+@ CHECK: vmul.i32	d4, d4, d3[1]   @ encoding: [0x63,0x48,0xa4,0xf2]
+@ CHECK: vmul.i32	d5, d5, d4[0]   @ encoding: [0x44,0x58,0xa5,0xf2]
+@ CHECK: vmul.f32	d6, d6, d5[1]   @ encoding: [0x65,0x69,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q0, q0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf3]
+@ CHECK: vmul.i16	q1, q1, d7[3]   @ encoding: [0x6f,0x28,0x92,0xf3]
+@ CHECK: vmul.i16	q2, q2, d1[1]   @ encoding: [0x49,0x48,0x94,0xf3]
+@ CHECK: vmul.i32	q3, q3, d2[0]   @ encoding: [0x42,0x68,0xa6,0xf3]
+@ CHECK: vmul.i32	q4, q4, d3[1]   @ encoding: [0x63,0x88,0xa8,0xf3]
+@ CHECK: vmul.i32	q5, q5, d4[0]   @ encoding: [0x44,0xa8,0xaa,0xf3]
+@ CHECK: vmul.f32	q6, q6, d5[1]   @ encoding: [0x65,0xc9,0xac,0xf3]
+
+@ CHECK: vmul.i16	d9, d0, d4[2]   @ encoding: [0x64,0x98,0x90,0xf2]
+@ CHECK: vmul.i16	d8, d1, d7[3]   @ encoding: [0x6f,0x88,0x91,0xf2]
+@ CHECK: vmul.i16	d7, d2, d1[1]   @ encoding: [0x49,0x78,0x92,0xf2]
+@ CHECK: vmul.i32	d6, d3, d2[0]   @ encoding: [0x42,0x68,0xa3,0xf2]
+@ CHECK: vmul.i32	d5, d4, d3[1]   @ encoding: [0x63,0x58,0xa4,0xf2]
+@ CHECK: vmul.i32	d4, d5, d4[0]   @ encoding: [0x44,0x48,0xa5,0xf2]
+@ CHECK: vmul.f32	d3, d6, d5[1]   @ encoding: [0x65,0x39,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q9, q0, d4[2]   @ encoding: [0x64,0x28,0xd0,0xf3]
+@ CHECK: vmul.i16	q8, q1, d7[3]   @ encoding: [0x6f,0x08,0xd2,0xf3]
+@ CHECK: vmul.i16	q7, q2, d1[1]   @ encoding: [0x49,0xe8,0x94,0xf3]
+@ CHECK: vmul.i32	q6, q3, d2[0]   @ encoding: [0x42,0xc8,0xa6,0xf3]
+@ CHECK: vmul.i32	q5, q4, d3[1]   @ encoding: [0x63,0xa8,0xa8,0xf3]
+@ CHECK: vmul.i32	q4, q5, d4[0]   @ encoding: [0x44,0x88,0xaa,0xf3]
+@ CHECK: vmul.f32	q3, q6, d5[1]   @ encoding: [0x65,0x69,0xac,0xf3]
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
index a7a1b83..7e4b543 100644
--- a/test/MC/ARM/neon-shift-encoding.s
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -1,70 +1,110 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
 _foo:
-@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
 	vshl.u8	d16, d17, d16
-@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
 	vshl.u16	d16, d17, d16
-@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
 	vshl.u32	d16, d17, d16
-@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
 	vshl.u64	d16, d17, d16
-@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
 	vshl.i8	d16, d16, #7
-@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
 	vshl.i16	d16, d16, #15
-@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
 	vshl.i32	d16, d16, #31
-@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
 	vshl.i64	d16, d16, #63
-@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
 	vshl.u8	q8, q9, q8
-@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
 	vshl.u16	q8, q9, q8
-@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
 	vshl.u32	q8, q9, q8
-@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
 	vshl.u64	q8, q9, q8
-@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
 	vshl.i8	q8, q8, #7
-@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
 	vshl.i16	q8, q8, #15
-@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
 	vshl.i32	q8, q8, #31
-@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
 	vshl.i64	q8, q8, #63
-@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+
+@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
+@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
+
+
 	vshr.u8	d16, d16, #7
-@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
 	vshr.u16	d16, d16, #15
-@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
 	vshr.u32	d16, d16, #31
-@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
 	vshr.u64	d16, d16, #63
-@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
 	vshr.u8	q8, q8, #7
-@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
 	vshr.u16	q8, q8, #15
-@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
 	vshr.u32	q8, q8, #31
-@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
 	vshr.u64	q8, q8, #63
-@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
 	vshr.s8	d16, d16, #7
-@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
 	vshr.s16	d16, d16, #15
-@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
 	vshr.s32	d16, d16, #31
-@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
 	vshr.s64	d16, d16, #63
-@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
 	vshr.s8	q8, q8, #7
-@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
 	vshr.s16	q8, q8, #15
-@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
 	vshr.s32	q8, q8, #31
-@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
 	vshr.s64	q8, q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
+@ implied destination operand variants.
+	vshr.u8	d16, #7
+	vshr.u16	d16, #15
+	vshr.u32	d16, #31
+	vshr.u64	d16, #63
+	vshr.u8	q8, #7
+	vshr.u16	q8, #15
+	vshr.u32	q8, #31
+	vshr.u64	q8, #63
+	vshr.s8	d16, #7
+	vshr.s16	d16, #15
+	vshr.s32	d16, #31
+	vshr.s64	d16, #63
+	vshr.s8	q8, #7
+	vshr.s16	q8, #15
+	vshr.s32	q8, #31
+	vshr.s64	q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
 @ CHECK: vsra.u8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf3]
 	vsra.u8   d16, d16, #7
 @ CHECK: vsra.u16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf3]
@@ -235,3 +275,64 @@ _foo:
 	vqrshrn.u32	d16, q8, #13
 @ CHECK: vqrshrn.u64	d16, q8, #13  @ encoding: [0x70,0x09,0xf3,0xf3]
 	vqrshrn.u64	d16, q8, #13
+
+@ Optional destination operand variants.
+        vshl.s8 q4, q5
+        vshl.s16 q4, q5
+        vshl.s32 q4, q5
+        vshl.s64 q4, q5
+
+        vshl.u8 q4, q5
+        vshl.u16 q4, q5
+        vshl.u32 q4, q5
+        vshl.u64 q4, q5
+
+        vshl.s8 d4, d5
+        vshl.s16 d4, d5
+        vshl.s32 d4, d5
+        vshl.s64 d4, d5
+
+        vshl.u8 d4, d5
+        vshl.u16 d4, d5
+        vshl.u32 d4, d5
+        vshl.u64 d4, d5
+
+@ CHECK: vshl.s8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf2]
+@ CHECK: vshl.s16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf2]
+@ CHECK: vshl.s32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf2]
+@ CHECK: vshl.s64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf2]
+
+@ CHECK: vshl.u8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf3]
+@ CHECK: vshl.u16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf3]
+@ CHECK: vshl.u32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf3]
+@ CHECK: vshl.u64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf3]
+
+@ CHECK: vshl.s8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf2]
+@ CHECK: vshl.s16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf2]
+@ CHECK: vshl.s32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf2]
+@ CHECK: vshl.s64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf2]
+
+@ CHECK: vshl.u8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf3]
+@ CHECK: vshl.u16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf3]
+@ CHECK: vshl.u32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf3]
+@ CHECK: vshl.u64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf3]
+
+        vshl.s8 q4, #2
+        vshl.s16 q4, #14
+        vshl.s32 q4, #27
+        vshl.s64 q4, #35
+
+        vshl.s8 d4, #6
+        vshl.u16 d4, #10
+        vshl.s32 d4, #17
+        vshl.u64 d4, #43
+
+@ CHECK: vshl.i8	q4, q4, #2      @ encoding: [0x58,0x85,0x8a,0xf2]
+@ CHECK: vshl.i16	q4, q4, #14     @ encoding: [0x58,0x85,0x9e,0xf2]
+@ CHECK: vshl.i32	q4, q4, #27     @ encoding: [0x58,0x85,0xbb,0xf2]
+@ CHECK: vshl.i64	q4, q4, #35     @ encoding: [0xd8,0x85,0xa3,0xf2]
+
+@ CHECK: vshl.i8	d4, d4, #6      @ encoding: [0x14,0x45,0x8e,0xf2]
+@ CHECK: vshl.i16	d4, d4, #10     @ encoding: [0x14,0x45,0x9a,0xf2]
+@ CHECK: vshl.i32	d4, d4, #17     @ encoding: [0x14,0x45,0xb1,0xf2]
+@ CHECK: vshl.i64	d4, d4, #43     @ encoding: [0x94,0x45,0xab,0xf2]
diff --git a/test/MC/ARM/neon-shuffle-encoding.s b/test/MC/ARM/neon-shuffle-encoding.s
index ed209f7..26734c1 100644
--- a/test/MC/ARM/neon-shuffle-encoding.s
+++ b/test/MC/ARM/neon-shuffle-encoding.s
@@ -1,50 +1,76 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
 	vext.8	d16, d17, d16, #3
-@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
 	vext.8	d16, d17, d16, #5
-@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
 	vext.8	q8, q9, q8, #3
-@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
 	vext.8	q8, q9, q8, #7
-@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xa0,0x06,0xf1,0xf2]
 	vext.16	d16, d17, d16, #3
-@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xe0,0x0c,0xf2,0xf2]
 	vext.32	q8, q9, q8, #3
-@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+	vext.64	q8, q9, q8, #1
+
+	vext.8	d17, d16, #3
+	vext.8	d7, d11, #5
+	vext.8	q3, q8, #3
+	vext.8	q9, q4, #7
+	vext.16	d1, d26, #3
+	vext.32	q5, q8, #3
+	vext.64	q5, q8, #1
+
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
+@ CHECK: vext.16 d16, d17, d16, #3      @ encoding: [0xa0,0x06,0xf1,0xf2]
+@ CHECK: vext.32 q8, q9, q8, #3         @ encoding: [0xe0,0x0c,0xf2,0xf2]
+@ CHECK: vext.64 q8, q9, q8, #1         @ encoding: [0xe0,0x08,0xf2,0xf2]
+
+@ CHECK: vext.8	d17, d17, d16, #3       @ encoding: [0xa0,0x13,0xf1,0xf2]
+@ CHECK: vext.8	d7, d7, d11, #5         @ encoding: [0x0b,0x75,0xb7,0xf2]
+@ CHECK: vext.8	q3, q3, q8, #3          @ encoding: [0x60,0x63,0xb6,0xf2]
+@ CHECK: vext.8	q9, q9, q4, #7          @ encoding: [0xc8,0x27,0xf2,0xf2]
+@ CHECK: vext.16 d1, d1, d26, #3        @ encoding: [0x2a,0x16,0xb1,0xf2]
+@ CHECK: vext.32 q5, q5, q8, #3         @ encoding: [0x60,0xac,0xba,0xf2]
+@ CHECK: vext.64 q5, q5, q8, #1         @ encoding: [0x60,0xa8,0xba,0xf2]
+
+
 	vtrn.8	d17, d16
-@ CHECK: vtrn.16	d17, d16                @ encoding: [0xa0,0x10,0xf6,0xf3]
 	vtrn.16	d17, d16
-@ CHECK: vtrn.32	d17, d16                @ encoding: [0xa0,0x10,0xfa,0xf3]
 	vtrn.32	d17, d16
-@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
 	vtrn.8	q9, q8
-@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xe0,0x20,0xf6,0xf3]
 	vtrn.16	q9, q8
-@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xe0,0x20,0xfa,0xf3]
 	vtrn.32	q9, q8
-@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+@ CHECK: vtrn.16 d17, d16               @ encoding: [0xa0,0x10,0xf6,0xf3]
+@ CHECK: vtrn.32 d17, d16               @ encoding: [0xa0,0x10,0xfa,0xf3]
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
+@ CHECK: vtrn.16 q9, q8                 @ encoding: [0xe0,0x20,0xf6,0xf3]
+@ CHECK: vtrn.32 q9, q8                 @ encoding: [0xe0,0x20,0xfa,0xf3]
+
+
 	vuzp.8	d17, d16
-@ CHECK: vuzp.16	d17, d16                @ encoding: [0x20,0x11,0xf6,0xf3]
 	vuzp.16	d17, d16
-@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
 	vuzp.8	q9, q8
-@ CHECK: vuzp.16	q9, q8                  @ encoding: [0x60,0x21,0xf6,0xf3]
 	vuzp.16	q9, q8
-@ CHECK: vuzp.32	q9, q8                  @ encoding: [0x60,0x21,0xfa,0xf3]
 	vuzp.32	q9, q8
-@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
 	vzip.8	d17, d16
-@ CHECK: vzip.16	d17, d16                @ encoding: [0xa0,0x11,0xf6,0xf3]
 	vzip.16	d17, d16
-@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
 	vzip.8	q9, q8
-@ CHECK: vzip.16	q9, q8                  @ encoding: [0xe0,0x21,0xf6,0xf3]
 	vzip.16	q9, q8
-@ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
 	vzip.32	q9, q8
 
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+@ CHECK: vuzp.16 d17, d16               @ encoding: [0x20,0x11,0xf6,0xf3]
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
+@ CHECK: vuzp.16 q9, q8                 @ encoding: [0x60,0x21,0xf6,0xf3]
+@ CHECK: vuzp.32 q9, q8                 @ encoding: [0x60,0x21,0xfa,0xf3]
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
+@ CHECK: vzip.16 d17, d16               @ encoding: [0xa0,0x11,0xf6,0xf3]
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
+@ CHECK: vzip.16 q9, q8                 @ encoding: [0xe0,0x21,0xf6,0xf3]
+@ CHECK: vzip.32 q9, q8                 @ encoding: [0xe0,0x21,0xfa,0xf3]
+
 
 @ VTRN alternate size suffices
 
diff --git a/test/MC/ARM/neon-sub-encoding.s b/test/MC/ARM/neon-sub-encoding.s
index 241a01f..0622e19 100644
--- a/test/MC/ARM/neon-sub-encoding.s
+++ b/test/MC/ARM/neon-sub-encoding.s
@@ -1,25 +1,51 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vsub.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf3]
 	vsub.i8	d16, d17, d16
-@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
 	vsub.i16	d16, d17, d16
-@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
 	vsub.i32	d16, d17, d16
-@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
 	vsub.i64	d16, d17, d16
-@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
 	vsub.f32	d16, d16, d17
-@ CHECK: vsub.i8	q8, q8, q9              @ encoding: [0xe2,0x08,0x40,0xf3]
 	vsub.i8	q8, q8, q9
-@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
 	vsub.i16	q8, q8, q9
-@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
 	vsub.i32	q8, q8, q9
-@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
 	vsub.i64	q8, q8, q9
-@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
 	vsub.f32	q8, q8, q9
+
+	vsub.i8  d13, d21
+	vsub.i16 d14, d22
+	vsub.i32 d15, d23
+	vsub.i64 d16, d24
+	vsub.f32 d17, d25
+	vsub.i8  q1, q10
+	vsub.i16 q2, q9
+	vsub.i32 q3, q8
+	vsub.i64 q4, q7
+	vsub.f32 q5, q6
+
+@ CHECK: vsub.i8	d16, d17, d16   @ encoding: [0xa0,0x08,0x41,0xf3]
+@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
+@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
+@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
+@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
+@ CHECK: vsub.i8	q8, q8, q9      @ encoding: [0xe2,0x08,0x40,0xf3]
+@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
+@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
+@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
+
+@ CHECK: vsub.i8	d13, d13, d21   @ encoding: [0x25,0xd8,0x0d,0xf3]
+@ CHECK: vsub.i16	d14, d14, d22   @ encoding: [0x26,0xe8,0x1e,0xf3]
+@ CHECK: vsub.i32	d15, d15, d23   @ encoding: [0x27,0xf8,0x2f,0xf3]
+@ CHECK: vsub.i64	d16, d16, d24   @ encoding: [0xa8,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	d17, d17, d25   @ encoding: [0xa9,0x1d,0x61,0xf2]
+@ CHECK: vsub.i8	q1, q1, q10     @ encoding: [0x64,0x28,0x02,0xf3]
+@ CHECK: vsub.i16	q2, q2, q9      @ encoding: [0x62,0x48,0x14,0xf3]
+@ CHECK: vsub.i32	q3, q3, q8      @ encoding: [0x60,0x68,0x26,0xf3]
+@ CHECK: vsub.i64	q4, q4, q7      @ encoding: [0x4e,0x88,0x38,0xf3]
+@ CHECK: vsub.f32	q5, q5, q6      @ encoding: [0x4c,0xad,0x2a,0xf2]
+
+
+
 @ CHECK: vsubl.s8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf2]
 	vsubl.s8	q8, d17, d16
 @ CHECK: vsubl.s16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf2]
diff --git a/test/MC/ARM/neon-table-encoding.s b/test/MC/ARM/neon-table-encoding.s
index 0b3fe40..343ae83 100644
--- a/test/MC/ARM/neon-table-encoding.s
+++ b/test/MC/ARM/neon-table-encoding.s
@@ -1,22 +1,22 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
 	vtbl.8	d16, {d17}, d16
-@	vtbl.8	d16, {d16, d17}, d18
-@	vtbl.8	d16, {d16, d17, d18}, d20
-@	vtbl.8	d16, {d16, d17, d18, d19}, d20
+	vtbl.8	d16, {d16, d17}, d18
+	vtbl.8	d16, {d16, d17, d18}, d20
+	vtbl.8	d16, {d16, d17, d18, d19}, d20
 
 @ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
-@ FIXME: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
-@ FIXME: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
-@ FIXME: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
 
 
 	vtbx.8	d18, {d16}, d17
-@	vtbx.8	d19, {d16, d17}, d18
-@	vtbx.8	d20, {d16, d17, d18}, d21
-@	vtbx.8	d20, {d16, d17, d18, d19}, d21
+	vtbx.8	d19, {d16, d17}, d18
+	vtbx.8	d20, {d16, d17, d18}, d21
+	vtbx.8	d20, {d16, d17, d18, d19}, d21
 
 @ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
-@ FIXME: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
-@ FIXME: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
-@ FIXME: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 503b1ec..736e953 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -118,6 +118,20 @@
 	vld2.16	{d16, d17, d18, d19}, [r0, :128]
 	vld2.32	{d16, d17, d18, d19}, [r0, :256]
 
+	vld2.8	{d19, d20}, [r0, :64]!
+	vld2.16	{d16, d17}, [r0, :128]!
+	vld2.32	{q10}, [r0]!
+	vld2.8	{d4-d7}, [r0, :64]!
+	vld2.16	{d1, d2, d3, d4}, [r0, :128]!
+	vld2.32	{q7, q8}, [r0, :256]!
+
+	vld2.8	{d19, d20}, [r0, :64], r6
+	vld2.16	{d16, d17}, [r0, :128], r6
+	vld2.32	{q10}, [r0], r6
+	vld2.8	{d4-d7}, [r0, :64], r6
+	vld2.16	{d1, d2, d3, d4}, [r0, :128], r6
+	vld2.32	{q7, q8}, [r0, :256], r6
+
 @ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
 @ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
@@ -125,6 +139,20 @@
 @ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
 
+@ CHECK: vld2.8	{d19, d20}, [r0, :64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0]!       @ encoding: [0x8d,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
+
+@ CHECK: vld2.8	{d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0], r6    @ encoding: [0x86,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
+
 
 @	vld3.8	{d16, d17, d18}, [r0, :64]
 @	vld3.16	{d16, d17, d18}, [r0]
@@ -168,26 +196,55 @@
 @ FIXME: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
 
 
-@	vld1.8	{d16[3]}, [r0]
+	vld1.8 {d4[]}, [r1]
+	vld1.8 {d4[]}, [r1]!
+	vld1.8 {d4[]}, [r1], r3
+	vld1.8 {d4[], d5[]}, [r1]
+	vld1.8 {d4[], d5[]}, [r1]!
+	vld1.8 {d4[], d5[]}, [r1], r3
+
+@ CHECK: vld1.8	{d4[]}, [r1]            @ encoding: [0x0f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1]!           @ encoding: [0x0d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1], r3        @ encoding: [0x03,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]      @ encoding: [0x2f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]!     @ encoding: [0x2d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
+
+	vld1.8	{d16[3]}, [r0]
 @	vld1.16	{d16[2]}, [r0, :16]
 @	vld1.32	{d16[1]}, [r0, :32]
+        vld1.p8 d12[6], [r2]!
+        vld1.i8 d12[6], [r2], r2
+        vld1.u16 d12[3], [r2]!
+        vld1.16 d12[2], [r2], r2
 
-@ FIXME: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
 @ FIXME: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
 @ FIXME: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
 @	vld2.8	{d16[1], d17[1]}, [r0, :16]
 @	vld2.16	{d16[1], d17[1]}, [r0, :32]
-@	vld2.32	{d16[1], d17[1]}, [r0]
+	vld2.32	{d16[1], d17[1]}, [r0]
 @	vld2.16	{d17[1], d19[1]}, [r0]
 @	vld2.32	{d17[0], d19[0]}, [r0, :64]
+        vld2.8 {d2[4], d3[4]}, [r2], r3
+        vld2.8 {d2[4], d3[4]}, [r2]!
+        vld2.8 {d2[4], d3[4]}, [r2]
 
 @ FIXME: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
 @ FIXME: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
-@ FIXME: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
+@ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
 @ FIXME: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
 @ FIXME: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
+
 
 
 @	vld3.8	{d16[1], d17[1], d18[1]}, [r0]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index bcfe8cf..3a4cb87 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -1,37 +1,61 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
 
 	vst1.8	{d16}, [r0, :64]
-@	vst1.16	{d16}, [r0]
-@	vst1.32	{d16}, [r0]
-@	vst1.64	{d16}, [r0]
-@	vst1.8	{d16, d17}, [r0, :64]
-@	vst1.16	{d16, d17}, [r0, :128]
-@	vst1.32	{d16, d17}, [r0]
-@	vst1.64	{d16, d17}, [r0]
+	vst1.16	{d16}, [r0]
+	vst1.32	{d16}, [r0]
+	vst1.64	{d16}, [r0]
+	vst1.8	{d16, d17}, [r0, :64]
+	vst1.16	{d16, d17}, [r0, :128]
+	vst1.32	{d16, d17}, [r0]
+	vst1.64	{d16, d17}, [r0]
+        vst1.8  {d16, d17, d18}, [r0, :64]
+        vst1.8  {d16, d17, d18}, [r0, :64]!
+        vst1.8  {d16, d17, d18}, [r0], r3
+        vst1.8  {d16, d17, d18, d19}, [r0, :64]
+        vst1.16  {d16, d17, d18, d19}, [r1, :64]!
+        vst1.64  {d16, d17, d18, d19}, [r3], r2
 
 @ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
-@ FIXME: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
-@ FIXME: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
-@ FIXME: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
-@ FIXME: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
-@ FIXME: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
-@ FIXME: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
-@ FIXME: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
-
-
-@	vst2.8	{d16, d17}, [r0, :64]
-@	vst2.16	{d16, d17}, [r0, :128]
-@	vst2.32	{d16, d17}, [r0]
-@	vst2.8	{d16, d17, d18, d19}, [r0, :64]
-@	vst2.16	{d16, d17, d18, d19}, [r0, :128]
-@	vst2.32	{d16, d17, d18, d19}, [r0, :256]
-
-@ FIXME: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-@ FIXME: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
-@ FIXME: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
-@ FIXME: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
-@ FIXME: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
-@ FIXME: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
+@ CHECK: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
+@ CHECK: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
+@ CHECK: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
+@ CHECK: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64]! @ encoding: [0x1d,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0], r3 @ encoding: [0x03,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x02,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x5d,0x02,0x41,0xf4]
+@ CHECK: vst1.64 {d16, d17, d18, d19}, [r3], r2 @ encoding: [0xc2,0x02,0x43,0xf4]
+
+
+	vst2.8	{d16, d17}, [r0, :64]
+	vst2.16	{d16, d17}, [r0, :128]
+	vst2.32	{d16, d17}, [r0]
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]
+	vst2.16	{d16, d17, d18, d19}, [r0, :128]
+	vst2.32	{d16, d17, d18, d19}, [r0, :256]
+	vst2.8	{d16, d17}, [r0, :64]!
+	vst2.16	{q15}, [r0, :128]!
+	vst2.32	{d14, d15}, [r0]!
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]!
+	vst2.16	{d18-d21}, [r0, :128]!
+	vst2.32	{q4, q5}, [r0, :256]!
+
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
+@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
+@ CHECK: vst2.16	{d30, d31}, [r0, :128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
+@ CHECK: vst2.32	{d14, d15}, [r0]!       @ encoding: [0x8d,0xe8,0x00,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]! @ encoding: [0x1d,0x03,0x40,0xf4]
+@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0, :128]! @ encoding: [0x6d,0x23,0x40,0xf4]
+@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
 
 
 @	vst3.8	{d16, d17, d18}, [r0, :64]
@@ -76,16 +100,24 @@
 
 @	vst2.8	{d16[1], d17[1]}, [r0, :16]
 @	vst2.16	{d16[1], d17[1]}, [r0, :32]
-@	vst2.32	{d16[1], d17[1]}, [r0]
+	vst2.32	{d16[1], d17[1]}, [r0]
 @	vst2.16	{d17[1], d19[1]}, [r0]
 @	vst2.32	{d17[0], d19[0]}, [r0, :64]
 
+        vst2.8 {d2[4], d3[4]}, [r2], r3
+        vst2.8 {d2[4], d3[4]}, [r2]!
+        vst2.8 {d2[4], d3[4]}, [r2]
+
 @ FIXME: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
 @ FIXME: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
-@ FIXME: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
+@ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
 @ FIXME: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
 @ FIXME: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
 
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
+
 
 @	vst3.8	{d16[1], d17[1], d18[1]}, [r0]
 @	vst3.16	{d16[1], d17[1], d18[1]}, [r0]
diff --git a/test/MC/ARM/neont2-table-encoding.s b/test/MC/ARM/neont2-table-encoding.s
index a22e955..9bfcc74 100644
--- a/test/MC/ARM/neont2-table-encoding.s
+++ b/test/MC/ARM/neont2-table-encoding.s
@@ -1,5 +1,4 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
@@ -8,10 +7,10 @@
 	vtbl.8	d16, {d16, d17, d18}, d20
 	vtbl.8	d16, {d16, d17, d18, d19}, d20
 
-@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xff]
-@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xff]
-@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xff]
-@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xff]
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xf1,0xff,0xa0,0x08]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xa2,0x09]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xf0,0xff,0xa4,0x0a]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xf0,0xff,0xa4,0x0b]
 
 
 	vtbx.8	d18, {d16}, d17
@@ -19,7 +18,7 @@
 	vtbx.8	d20, {d16, d17, d18}, d21
 	vtbx.8	d20, {d16, d17, d18, d19}, d21
 
-@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xff]
-@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xff]
-@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xff]
-@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xff]
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xf0,0xff,0xe1,0x28]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xe2,0x39]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xf0,0xff,0xe5,0x4a]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xf0,0xff,0xe5,0x4b]
diff --git a/test/MC/AsmParser/directive_incbin.s b/test/MC/AsmParser/directive_incbin.s
new file mode 100644
index 0000000..55f9c79
--- /dev/null
+++ b/test/MC/AsmParser/directive_incbin.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I %p | FileCheck %s
+
+.data
+.incbin "incbin_abcd"
+
+# CHECK: .ascii	 "abcd\n"
diff --git a/test/MC/AsmParser/incbin_abcd b/test/MC/AsmParser/incbin_abcd
new file mode 100644
index 0000000..acbe86c
--- /dev/null
+++ b/test/MC/AsmParser/incbin_abcd
@@ -0,0 +1 @@
+abcd
diff --git a/test/MC/COFF/symbol-mangling.ll b/test/MC/COFF/symbol-mangling.ll
new file mode 100644
index 0000000..f1b4b4b
--- /dev/null
+++ b/test/MC/COFF/symbol-mangling.ll
@@ -0,0 +1,17 @@
+; The purpose of this test is to see if the MC layer properly handles symbol
+; names needing quoting on MS/Windows. This code is generated by clang when
+; using -cxx-abi microsoft.
+
+; RUN: llc -filetype=asm -mtriple i686-pc-win32 %s -o - | FileCheck %s
+
+; CHECK: ?sayhi@A@@QBEXXZ
+
+%struct.A = type {}
+
+define i32 @main() {
+entry:
+  tail call void @"\01?sayhi@A@@QBEXXZ"(%struct.A* null)
+  ret i32 0
+}
+
+declare void @"\01?sayhi@A@@QBEXXZ"(%struct.A*)
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 6f01f52..ff1838e 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -1586,5 +1586,5 @@
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
 
 0x63 0xf9 0x37 0xc9
-# CHECK: vld2.8	{d28, d29}, [r3, :256], r7
+# CHECK: vld2.8	{d28, d30}, [r3, :256], r7
 
diff --git a/test/MC/Disassembler/MBlaze/mblaze_mbar.txt b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
new file mode 100644
index 0000000..6beba86
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Memory Barrier instructions
+################################################################################
+
+# CHECK:    mbar        0
+0xB8 0x02 0x00 0x04
+
+# CHECK:    mbar        1
+0xB8 0x22 0x00 0x04
+
+# CHECK:    mbar        2
+0xB8 0x42 0x00 0x04
diff --git a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
index 1268378..cb19ee0 100644
--- a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
+++ b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
@@ -12,3 +12,6 @@
 
 # CHECK:    pcmpeq      r0, r1, r2
 0x88 0x01 0x14 0x00
+
+# CHECK:    clz         r0, r1
+0x90 0x01 0x00 0xE0
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index 8cc5dbb..81ae5d7 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -6,6 +6,10 @@
         addl    $_GLOBAL_OFFSET_TABLE_, %ebx
         leal    _GLOBAL_OFFSET_TABLE_(%ebx), %ebx
 
+// But not in this case
+foo:
+        addl    _GLOBAL_OFFSET_TABLE_-foo,%ebx
+
 // CHECK:      ('sh_name', 0x00000005) # '.text'
 // CHECK-NEXT: ('sh_type',
 // CHECK-NEXT: ('sh_flags',
@@ -16,4 +20,4 @@
 // CHECK-NEXT: ('sh_info',
 // CHECK-NEXT: ('sh_addralign',
 // CHECK-NEXT: ('sh_entsize',
-// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000')
+// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000 031d0200 0000')
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 4421763..85da2eb 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -160,6 +160,18 @@
 // CHECK-NEXT:  ('r_sym', 0x00000d)
 // CHECK-NEXT:  ('r_type', 0x21)
 // CHECK-NEXT: ),
+// Relocation 25 (_GLOBAL_OFFSET_TABLE_-bar2) is of type R_386_GOTPC.
+// CHECK-NEXT: Relocation 25
+// CHECK-NEXT: (('r_offset', 0x00000094)
+// CHECK-NEXT:  ('r_sym', 0x00000b)
+// CHECK-NEXT:  ('r_type', 0x0a)
+// CHECK-NEXT: ),
+// Relocation 26 (und_symbol-bar2) is of type R_386_PC32
+// CHECK-NEXT: Relocation 26
+// CHECK-NEXT: (('r_offset', 0x0000009a)
+// CHECK-NEXT:  ('r_sym', 0x00000e)
+// CHECK-NEXT:  ('r_type', 0x02)
+// CHECK-NEXT: ),
 
 // Section 4 is bss
 // CHECK:      # Section 4
@@ -225,6 +237,8 @@ bar2:
         movl zed@DTPOFF(%eax), %eax
         pushl $bar
         addl foo@GOTTPOFF(%edx), %eax
+        subl    _GLOBAL_OFFSET_TABLE_-bar2, %ebx
+        leal und_symbol-bar2(%edx),%ecx
 
         .section        zedsec,"awT",@progbits
 zed:
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index 2b25a6b..ec53e4f 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -12,6 +12,10 @@ bar:
 // Test that gnu_unique_object is accepted.
         .type zed,@gnu_unique_object
 
+ifunc:
+        .global ifunc
+        .type ifunc,@gnu_indirect_function
+
 // CHECK:      # Symbol 4
 // CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
 // CHECK-NEXT:  ('st_bind', 0x1)
@@ -30,3 +34,13 @@ bar:
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 6
+// CHECK-NEXT: (('st_name', 0x00000009) # 'ifunc'
+// CHECK-NEXT:  ('st_bind', 0x1)
+// CHECK-NEXT:  ('st_type', 0xa)
+// CHECK-NEXT:  ('st_other', 0x00)
+// CHECK-NEXT:  ('st_shndx', 0x0001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
diff --git a/test/MC/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
index 86b45e0..fe69a94 100644
--- a/test/MC/ARM/darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -19,12 +19,14 @@ Ld0_0:
 Lsc0_0:
         .long 0
 
+        .subsections_via_symbols
+
 @ CHECK: ('cputype', 12)
 @ CHECK: ('cpusubtype', 9)
 @ CHECK: ('filetype', 1)
 @ CHECK: ('num_load_commands', 3)
 @ CHECK: ('load_commands_size', 364)
-@ CHECK: ('flag', 0)
+@ CHECK: ('flag', 8192)
 @ CHECK: ('load_commands', [
 @ CHECK:   # Load Command 0
 @ CHECK:  (('command', 1)
diff --git a/test/MC/ARM/darwin-Thumb-reloc.s b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
index 567573d..567573d 100644
--- a/test/MC/ARM/darwin-Thumb-reloc.s
+++ b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
diff --git a/test/MC/MachO/ARM/dg.exp b/test/MC/MachO/ARM/dg.exp
new file mode 100644
index 0000000..055fa25
--- /dev/null
+++ b/test/MC/MachO/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/MC/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8f646db..8f646db 100644
--- a/test/MC/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
diff --git a/test/MC/ARM/nop-armv6t2-padding.s b/test/MC/MachO/ARM/nop-armv6t2-padding.s
index c38ad2d..c38ad2d 100644
--- a/test/MC/ARM/nop-armv6t2-padding.s
+++ b/test/MC/MachO/ARM/nop-armv6t2-padding.s
diff --git a/test/MC/ARM/nop-thumb-padding.s b/test/MC/MachO/ARM/nop-thumb-padding.s
index 1e173f1..1e173f1 100644
--- a/test/MC/ARM/nop-thumb-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb-padding.s
diff --git a/test/MC/ARM/nop-thumb2-padding.s b/test/MC/MachO/ARM/nop-thumb2-padding.s
index a8aa3a1..a8aa3a1 100644
--- a/test/MC/ARM/nop-thumb2-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb2-padding.s
diff --git a/test/MC/MachO/ARM/relax-thumb2-branches.s b/test/MC/MachO/ARM/relax-thumb2-branches.s
new file mode 100644
index 0000000..7916d42
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb2-branches.s
@@ -0,0 +1,14 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        ble Lfoo        @ wide encoding
+
+        .space 258
+Lfoo:
+        nop
+
+        ble Lbaz        @ narrow encoding
+        .space 256
+Lbaz:
+
+@ CHECK: '_section_data', '40f38180
+@ CHECK: 000000bf 7fdd
diff --git a/test/MC/ARM/thumb2-movt-fixup.s b/test/MC/MachO/ARM/thumb2-movt-fixup.s
index ddd95b5..ddd95b5 100644
--- a/test/MC/ARM/thumb2-movt-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movt-fixup.s
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
new file mode 100644
index 0000000..a443d75
--- /dev/null
+++ b/test/MC/MachO/gen-dwarf.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump %t | FileCheck %s
+
+.globl _bar
+_bar:
+	movl	$0, %eax
+L1:	leave
+	ret
+_foo:
+	nop
+.data
+_x:	.long 1
+
+// CHECK: file format Mach-O 32-bit i386
+
+// CHECK: .debug_abbrev contents:
+// CHECK: Abbrev table for offset: 0x00000000
+// CHECK: [1] DW_TAG_compile_unit	DW_CHILDREN_yes
+// CHECK: 	DW_AT_stmt_list	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_high_pc	DW_FORM_addr
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_comp_dir	DW_FORM_string
+// CHECK: 	DW_AT_producer	DW_FORM_string
+// CHECK: 	DW_AT_language	DW_FORM_data2
+
+// CHECK: [2] DW_TAG_subprogram	DW_CHILDREN_yes
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_decl_file	DW_FORM_data4
+// CHECK: 	DW_AT_decl_line	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_high_pc	DW_FORM_addr
+// CHECK: 	DW_AT_prototyped	DW_FORM_flag
+
+// CHECK: [3] DW_TAG_unspecified_parameters	DW_CHILDREN_no
+
+
+// CHECK: .debug_info contents:
+
+// We don't check the leading addresses these are at.
+// CHECK:  DW_TAG_compile_unit [1] *
+// CHECK:    DW_AT_stmt_list [DW_FORM_data4]	(0x00000000)
+// CHECK:    DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:    DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
+// We don't check the file name as it is a temp directory
+// CHECK:    DW_AT_name [DW_FORM_string]
+// We don't check the DW_AT_comp_dir which is the current working directory
+// CHECK:    DW_AT_producer [DW_FORM_string]	("llvm-mc (based on {{.*}})")
+// CHECK:    DW_AT_language [DW_FORM_data2]	(0x8001)
+
+// CHECK:    DW_TAG_subprogram [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("bar")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000005)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:      DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_subprogram [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("foo")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000009)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    NULL
+
+// CHECK: .debug_aranges contents:
+// CHECK: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+
+// CHECK: .debug_lines contents:
+// CHECK: Line table prologue:
+// We don't check the total_length as it includes lengths of temp paths
+// CHECK:         version: 2
+// We don't check the prologue_length as it too includes lengths of temp paths
+// CHECK: min_inst_length: 1
+// CHECK: default_is_stmt: 1
+// CHECK:       line_base: -5
+// CHECK:      line_range: 14
+// CHECK:     opcode_base: 13
+// CHECK: standard_opcode_lengths[DW_LNS_copy] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_advance_line] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_file] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_column] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_isa] = 1
+// We don't check include_directories as it has a temp path
+// CHECK:                 Dir  Mod Time   File Len   File Name
+// CHECK:                 ---- ---------- ---------- ---------------------------
+// CHECK: file_names[  1]    1 0x00000000 0x00000000 gen-dwarf.s
+
+// CHECK: Address            Line   Column File   ISA Flags
+// CHECK: ------------------ ------ ------ ------ --- -------------
+// CHECK: 0x0000000000000000      6      0      1   0  is_stmt
+// CHECK: 0x0000000000000005      7      0      1   0  is_stmt
+// CHECK: 0x0000000000000006      8      0      1   0  is_stmt
+// CHECK: 0x0000000000000007     10      0      1   0  is_stmt
+// CHECK: 0x0000000000000008     10      0      1   0  is_stmt end_sequence
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index e0f12bf..bc611d7 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -12,3 +12,5 @@
         .text
 _a:
         call _a
+
+        .subsections_via_symbols
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index fff7cc0..2684408 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -60,3 +60,5 @@ L1:
         call _c + 1
 //        call _a - L0
         call _b - L0
+
+        .subsections_via_symbols
diff --git a/test/MC/Mips/dg.exp b/test/MC/Mips/dg.exp
new file mode 100644
index 0000000..e469402
--- /dev/null
+++ b/test/MC/Mips/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Mips] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/MC/Mips/elf-relsym.ll b/test/MC/Mips/elf-relsym.ll
new file mode 100644
index 0000000..0f74437
--- /dev/null
+++ b/test/MC/Mips/elf-relsym.ll
@@ -0,0 +1,29 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate symbols were created.
+
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str1'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_0'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_1'
+
+@.str = private unnamed_addr constant [6 x i8] c"abcde\00", align 1
+@gc1 = external global i8*
+@.str1 = private unnamed_addr constant [5 x i8] c"fghi\00", align 1
+@gc2 = external global i8*
+@gd1 = external global double
+@gd2 = external global double
+
+define void @foo1() nounwind {
+entry:
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** @gc1, align 4
+  store i8* getelementptr inbounds ([5 x i8]* @.str1, i32 0, i32 0), i8** @gc2, align 4
+  %0 = load double* @gd1, align 8
+  %add = fadd double %0, 2.500000e+00
+  store double %add, double* @gd1, align 8
+  %1 = load double* @gd2, align 8
+  %add1 = fadd double %1, 4.500000e+00
+  store double %add1, double* @gd2, align 8
+  ret void
+}
+
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index d3b226f..990ba40 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -3346,3 +3346,10 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 _foo:
   nop
   vpshufb _foo(%rip), %xmm0, %xmm0
+
+// CHECK: vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x4a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: _foo2-5
+_foo2:
+  nop
+  vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
new file mode 100644
index 0000000..805fc23
--- /dev/null
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -0,0 +1,391 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// vfmadd
+// CHECK: vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x01,0x10]
+          vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x01,0x10]
+          vfmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
+          vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
+          vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
+          vfmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x01,0x10]
+          vfmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+          vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x01,0x10]
+          vfmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+          vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x01,0x10]
+          vfmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+          vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x01,0x10]
+          vfmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+          vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsub
+// CHECK: vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
+          vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6e,0x01,0x10]
+          vfmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
+          vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0x01,0x10]
+          vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6f,0x01,0x10]
+          vfmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
+          vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6c,0x01,0x10]
+          vfmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+          vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6d,0x01,0x10]
+          vfmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+          vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6c,0x01,0x10]
+          vfmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+          vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6d,0x01,0x10]
+          vfmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+          vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmadd
+// CHECK: vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0x01,0x10]
+          vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7a,0x01,0x10]
+          vfnmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
+          vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0x01,0x10]
+          vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7b,0x01,0x10]
+          vfnmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
+          vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x78,0x01,0x10]
+          vfnmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+          vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x79,0x01,0x10]
+          vfnmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+          vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x78,0x01,0x10]
+          vfnmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+          vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x79,0x01,0x10]
+          vfnmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+          vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmsub
+// CHECK: vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0x01,0x10]
+          vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7e,0x01,0x10]
+          vfnmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
+          vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0x01,0x10]
+          vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7f,0x01,0x10]
+          vfnmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
+          vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7c,0x01,0x10]
+          vfnmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+          vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7d,0x01,0x10]
+          vfnmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+          vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7c,0x01,0x10]
+          vfnmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+          vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7d,0x01,0x10]
+          vfnmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+          vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmaddsub
+// CHECK: vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5c,0x01,0x10]
+          vfmaddsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+          vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5d,0x01,0x10]
+          vfmaddsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+          vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5c,0x01,0x10]
+          vfmaddsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+          vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5d,0x01,0x10]
+          vfmaddsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+          vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsubadd
+// CHECK: vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5e,0x01,0x10]
+          vfmsubaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+          vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5f,0x01,0x10]
+          vfmsubaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+          vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5e,0x01,0x10]
+          vfmsubaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+          vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5f,0x01,0x10]
+          vfmsubaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+          vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-xop-encoding.s b/test/MC/X86/x86_64-xop-encoding.s
new file mode 100644
index 0000000..1137b71
--- /dev/null
+++ b/test/MC/X86/x86_64-xop-encoding.s
@@ -0,0 +1,584 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+//////////////////////////
+// 2 operand instructions
+/////////////////////////
+        
+// vphsubwd
+// CHECK: vphsubwd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0x0c,0x01]
+          vphsubwd (%rcx,%rax), %xmm1
+// CHECK: vphsubwd %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0xc8]
+          vphsubwd %xmm0, %xmm1
+
+// vphsubdq
+// CHECK: vphsubdq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0x0c,0x01] 
+          vphsubdq (%rcx,%rax), %xmm1
+// CHECK: vphsubdq %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0xc8]
+          vphsubdq %xmm0, %xmm1
+
+// vphsubbw
+// CHECK: vphsubbw (%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0x08]
+          vphsubbw (%rax), %xmm1
+// CHECK: vphsubbw %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0xca]
+          vphsubbw %xmm2, %xmm1
+
+// vphaddwq
+// CHECK: vphaddwq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0x21]
+          vphaddwq (%rcx), %xmm4
+// CHECK: vphaddwq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0xd6]
+          vphaddwq %xmm6, %xmm2
+
+// vphaddwd
+// CHECK: vphaddwd (%rdx,%rax), %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0x3c,0x02]
+          vphaddwd (%rdx,%rax), %xmm7
+// CHECK: vphaddwd %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0xe3]
+          vphaddwd %xmm3, %xmm4
+
+// vphadduwq
+// CHECK: vphadduwq (%rcx,%rax), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0x34,0x01]
+          vphadduwq (%rcx,%rax), %xmm6
+// CHECK: vphadduwq %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0xc7]
+          vphadduwq %xmm7, %xmm0
+
+// vphadduwd
+// CHECK: vphadduwd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0x28]
+          vphadduwd (%rax), %xmm5
+// CHECK: vphadduwd %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0xca]
+          vphadduwd %xmm2, %xmm1
+
+// vphaddudq
+// CHECK: vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0x64,0x01,0x08]
+          vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: vphaddudq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0xd6]
+          vphaddudq %xmm6, %xmm2
+
+// vphaddubw
+// CHECK: vphaddubw (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0x19]
+          vphaddubw (%rcx), %xmm3
+// CHECK: vphaddubw %xmm5, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0xc5]
+          vphaddubw %xmm5, %xmm0
+
+// vphaddubq
+// CHECK: vphaddubq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0x21]
+          vphaddubq (%rcx), %xmm4
+// CHECK: vphaddubq %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0xd2]
+          vphaddubq %xmm2, %xmm2
+
+// vphaddubd
+// CHECK: vphaddubd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0x28]
+          vphaddubd (%rax), %xmm5
+// CHECK: vphaddubd %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0xfd]
+          vphaddubd %xmm5, %xmm7
+
+// vphadddq
+// CHECK: vphadddq (%rdx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0x22]
+          vphadddq (%rdx), %xmm4
+// CHECK: vphadddq %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0xec]
+          vphadddq %xmm4, %xmm5
+
+// vphaddbw
+// CHECK: vphaddbw (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0x0c,0x01]
+          vphaddbw (%rcx,%rax), %xmm1
+// CHECK: vphaddbw %xmm5, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0xf5]
+          vphaddbw %xmm5, %xmm6
+
+// vphaddbq
+// CHECK: vphaddbq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0x0c,0x01]
+          vphaddbq (%rcx,%rax), %xmm1
+// CHECK: vphaddbq %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0xc2]
+          vphaddbq %xmm2, %xmm0
+
+// vphaddbd
+// CHECK: vphaddbd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0x0c,0x01]
+          vphaddbd (%rcx,%rax), %xmm1
+// CHECK: vphaddbd %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0xd9]
+          vphaddbd %xmm1, %xmm3
+
+// vfrczss
+// CHECK: vfrczss (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0x0c,0x01]
+          vfrczss (%rcx,%rax), %xmm1
+// CHECK: vfrczss %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0xfd]
+          vfrczss %xmm5, %xmm7
+
+// vfrczsd
+// CHECK: vfrczsd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0x0c,0x01]
+          vfrczsd (%rcx,%rax), %xmm1
+// CHECK: vfrczsd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0xc7]
+          vfrczsd %xmm7, %xmm0
+
+// vfrczps
+// CHECK: vfrczps 4(%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x58,0x04]
+          vfrczps 4(%rax), %xmm3
+// CHECK: vfrczps %xmm6, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0xee]
+          vfrczps %xmm6, %xmm5
+// CHECK: vfrczps (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x09]
+          vfrczps (%rcx), %xmm1
+// CHECK: vfrczps %ymm2, %ymm4
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x80,0xe2]
+          vfrczps %ymm2, %ymm4
+
+// vfrczpd
+// CHECK: vfrczpd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0x0c,0x01]
+          vfrczpd (%rcx,%rax), %xmm1
+// CHECK: vfrczpd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0xc7]
+          vfrczpd %xmm7, %xmm0
+// CHECK: vfrczpd (%rcx,%rax), %ymm2
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0x14,0x01]
+          vfrczpd (%rcx,%rax), %ymm2
+// CHECK: vfrczpd %ymm5, %ymm3
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0xdd]
+          vfrczpd %ymm5, %ymm3
+
+
+        
+//////////////////////////
+// 3 operand instructions
+/////////////////////////
+        
+// vpshlw
+// CHECK: vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0xd1]
+          vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: vpshlw (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x95,0x10]
+          vpshlw (%rax), %xmm1, %xmm2
+// CHECK: vpshlw %xmm0, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0x14,0x08]
+          vpshlw %xmm0, (%rax,%rcx), %xmm2
+
+// vpshlq
+// CHECK: vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x68,0x97,0xf4]
+          vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x97,0x09]
+          vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: vpshlq %xmm5, (%rdx,%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x50,0x97,0x34,0x0a]
+          vpshlq %xmm5, (%rdx,%rcx), %xmm6
+
+// vpshld
+// CHECK: vpshld %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x96,0xdd]
+          vpshld %xmm7, %xmm5, %xmm3
+// CHECK: vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x96,0x58,0x04]
+          vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: vpshld %xmm1, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x70,0x96,0x2c,0x08]
+          vpshld %xmm1, (%rax,%rcx), %xmm5
+
+// vpshlb
+// CHECK: vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x70,0x94,0xda]
+          vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x94,0x39]
+          vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: vpshlb %xmm2, (%rax,%rdx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x68,0x94,0x1c,0x10]
+          vpshlb %xmm2, (%rax,%rdx), %xmm3
+
+// vpshaw
+// CHECK: vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x99,0xdd]
+          vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: vpshaw (%rax), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x99,0x08]
+          vpshaw (%rax), %xmm2, %xmm1
+// CHECK: vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x99,0x5c,0x08,0x08]
+          vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+
+// vpshaq
+// CHECK: vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x58,0x9b,0xe4]
+          vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9b,0x01]
+          vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: vpshaq %xmm6, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x48,0x9b,0x2c,0x08]
+          vpshaq %xmm6, (%rax,%rcx), %xmm5
+
+// vpshad
+// CHECK: vpshad %xmm5, %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x50,0x9a,0xc4]
+          vpshad %xmm5, %xmm4, %xmm0
+// CHECK: vpshad (%rax), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9a,0x28]
+          vpshad (%rax), %xmm2, %xmm5
+// CHECK: vpshad %xmm2, (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x68,0x9a,0x28]
+          vpshad %xmm2, (%rax), %xmm5
+
+// vpshab
+// CHECK: vpshab %xmm1, %xmm1, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x70,0x98,0xc1]
+          vpshab %xmm1, %xmm1, %xmm0
+// CHECK: vpshab (%rcx), %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xd8,0x98,0x01]
+          vpshab (%rcx), %xmm4, %xmm0
+// CHECK: vpshab %xmm5, (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x50,0x98,0x19]
+          vpshab %xmm5, (%rcx), %xmm3
+
+// vprotw
+// CHECK: vprotw (%rax), %xmm3, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x91,0x30]
+          vprotw (%rax), %xmm3, %xmm6
+// CHECK: vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x50,0x91,0x0c,0x08]
+          vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: vprotw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x91,0xd1]
+          vprotw %xmm0, %xmm1, %xmm2
+// CHECK: vprotw $42, (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x09,0x2a]
+          vprotw $42, (%rcx), %xmm1
+// CHECK: vprotw $41, (%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x20,0x29]
+          vprotw $41, (%rax), %xmm4
+// CHECK: vprotw $40, %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0xd9,0x28]
+          vprotw $40, %xmm1, %xmm3
+
+// vprotq
+// CHECK: vprotq (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x10]
+          vprotq (%rax), %xmm1, %xmm2
+// CHECK: vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x14,0x08]
+          vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: vprotq %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x93,0xd1]
+          vprotq %xmm0, %xmm1, %xmm2
+// CHECK: vprotq $42, (%rax), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x10,0x2a]
+          vprotq $42, (%rax), %xmm2
+// CHECK: vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x14,0x08,0x2a]
+          vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: vprotq $42, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0xd1,0x2a]
+          vprotq $42, %xmm1, %xmm2
+
+// vprotd
+// CHECK: vprotd (%rax), %xmm0, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x92,0x18]
+          vprotd (%rax), %xmm0, %xmm3
+// CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x68,0x92,0x24,0x08]
+          vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: vprotd %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x50,0x92,0xd3]
+          vprotd %xmm5, %xmm3, %xmm2
+// CHECK: vprotd $43, (%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x31,0x2b]
+          vprotd $43, (%rcx), %xmm6
+// CHECK: vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x3c,0x08,0x2c]
+          vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: vprotd $45, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0xe4,0x2d]
+          vprotd $45, %xmm4, %xmm4
+
+// vprotb
+// CHECK: vprotb (%rcx), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x90,0x29]
+          vprotb (%rcx), %xmm2, %xmm5
+// CHECK: vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x50,0x90,0x24,0x08]
+          vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: vprotb %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x58,0x90,0xd3]
+          vprotb %xmm4, %xmm3, %xmm2
+// CHECK: vprotb $46, (%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x18,0x2e]
+          vprotb $46, (%rax), %xmm3
+// CHECK: vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x3c,0x08,0x2f]
+          vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: vprotb $48, %xmm5, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0xed,0x30]
+          vprotb $48, %xmm5, %xmm5
+
+//////////////////////////
+// 4 operand instructions
+/////////////////////////
+
+// vpmadcswd
+// CHECK: vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0xe2,0x10]
+        vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0x20,0x10]
+        vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+
+// vpmadcsswd
+// CHECK: vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x48,0xa6,0xe4,0x10]
+          vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa6,0x24,0x08,0x10]
+          vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+
+// vpmacsww
+// CHECK: vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x50,0x95,0xe2,0x00]
+          vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x95,0x20,0x10]
+          vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+
+// vpmacswd
+// CHECK: vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x96,0xfd,0x40]
+          vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x96,0x10,0x00]
+          vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+
+// vpmacssww
+// CHECK: vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x68,0x85,0xcb,0x40]
+          vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+// CHECK: encoding: [0x8f,0xe8,0x40,0x85,0x39,0x60]
+          vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+
+// vpmacsswd
+// CHECK: vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x86,0xd2,0x40]
+          vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x86,0x44,0x08,0x08,0x00]
+          vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+
+// vpmacssdql
+// CHECK: vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x68,0x87,0xe1,0x10]
+          vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x87,0x29,0x70]
+          vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+
+// vpmacssdqh
+// CHECK: vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0x8f,0xca,0x30]
+          vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x8f,0x1c,0x08,0x70]
+          vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+
+// vpmacssdd
+// CHECK: vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0x8e,0xea,0x20]
+          vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x8e,0x10,0x40]
+          vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+
+// vpmacsdql
+// CHECK: vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x97,0xf8,0x30]
+          vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x60,0x97,0x69,0x08,0x50]
+          vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+
+// vpmacsdqh
+// CHECK: vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0x9f,0xd5,0x70]
+          vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x9f,0x40,0x04,0x50]
+          vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+
+// vpmacsdd
+// CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0xd6,0x40]
+          vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0x1c,0x08,0x40]
+          vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+
+// vpcomw
+// CHECK: vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0xe2,0x2a]
+          vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: vpcomw $42, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0x20,0x2a]
+          vpcomw $42, (%rax), %xmm3, %xmm4
+
+// vpcomuw
+// CHECK: vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0xed,0xe9,0x2b]
+          vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xed,0x34,0x08,0x2c]
+          vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+
+// vpcomuq
+// CHECK: vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0xfb,0x2d]
+          vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: vpcomuq $46, (%rax), %xmm3, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0x08,0x2e]
+          vpcomuq $46, (%rax), %xmm3, %xmm1
+
+// vpcomud
+// CHECK: vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x70,0xee,0xd0,0x2f]
+          vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: vpcomud $48, 4(%rax), %xmm6, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x48,0xee,0x58,0x04,0x30]
+          vpcomud $48, 4(%rax), %xmm6, %xmm3
+
+// vpcomub
+// CHECK: vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x58,0xec,0xeb,0x31]
+          vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: vpcomub $50, (%rcx), %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x48,0xec,0x11,0x32]
+          vpcomub $50, (%rcx), %xmm6, %xmm2
+
+// vpcomq
+// CHECK: vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xcf,0xeb,0x33]
+          vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: vpcomq $52, (%rax), %xmm1, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x70,0xcf,0x38,0x34]
+          vpcomq $52, (%rax), %xmm1, %xmm7
+
+// vpcomd
+// CHECK: vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: encoding: [0x8f,0xe8,0x60,0xce,0xc3,0x35]
+          vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: vpcomd $54, (%rcx), %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x68,0xce,0x11,0x36]
+          vpcomd $54, (%rcx), %xmm2, %xmm2
+
+// vpcomb
+// CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0xcc,0xd6,0x37]
+          vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcc,0x50,0x08,0x38]
+          vpcomb $56, 8(%rax), %xmm3, %xmm2
+
+
+// vpperm
+// CHECK: vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0xe2,0x10]
+        vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa3,0x20,0x20]
+        vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpperm %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0x20,0x10]
+        vpperm %xmm1, (%rax), %xmm3, %xmm4
+
+// vpcmov
+// CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0xe2,0x10]
+	vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa2,0x20,0x20]
+	vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0x20,0x10]
+	vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0xe2,0x10]
+	vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0xe4,0xa2,0x20,0x20]
+	vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0x20,0x10]
+	vpcmov %ymm1, (%rax), %ymm3, %ymm4
+
+
+//////////////////////////
+// 5 operand instructions
+/////////////////////////
+// vpermil2pd
+// CHECK: vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0xfa,0x51]
+          vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x49,0x20,0x32]
+          vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xdd,0x49,0x70,0x08,0x03]
+          vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0x04,0x08,0x30]
+          vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0xe2,0x11]
+          vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0x20,0x12]
+          vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+
+// vpermil2ps
+// CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x69,0x48,0xcb,0x40]
+          vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x48,0x40,0x04,0x21]
+          vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xd5,0x48,0x30,0x12]
+          vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0x61,0x48,0x20,0x13]
+          vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x48,0xd4,0x40]
+          vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x75,0x49,0x40,0x04,0x11]
+          vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 3f418d7..e5635ab 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -6,6 +6,10 @@ RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
+RUN:         | FileCheck %s -check-prefix macho
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
+RUN:         | FileCheck %s -check-prefix macho64
 
 COFF: 00000000 d .data
 COFF: 00000000 t .text
@@ -17,3 +21,13 @@ COFF:          U {{_?}}puts
 ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
+
+
+macho: 00000000 U _SomeOtherFunction
+macho: 00000000 s _main
+macho: 00000000 U _puts
+
+macho64: 00000028 s L_.str
+macho64: 00000000 u _SomeOtherFunction
+macho64: 00000000 s _main
+macho64: 00000000 u _puts
+\ No newline at end of file
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
index e9d4cba..8a0f440 100644
--- a/test/Object/objdump-symbol-table.test
+++ b/test/Object/objdump-symbol-table.test
@@ -2,6 +2,8 @@ RUN: llvm-objdump -t %p/Inputs/trivial-object-test.coff-i386 \
 RUN:              | FileCheck %s -check-prefix COFF-i386
 RUN: llvm-objdump -t %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
+RUN:              | FileCheck %s -check-prefix macho-i386
 
 COFF-i386: trivial-object-test.coff-i386:     file format
 COFF-i386: SYMBOL TABLE:
@@ -23,3 +25,9 @@ ELF-i386: 00000031 l    d  .note.GNU-stack        00000000 .note.GNU-stack
 ELF-i386: 00000000 g     F .text  00000024 main
 ELF-i386: 00000000         *UND*  00000000 SomeOtherFunction
 ELF-i386: 00000000         *UND*  00000000 puts
+
+macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i386
+macho-i386: SYMBOL TABLE:
+macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
+macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
+macho-i386: 00000000         *UND*  00000000 _puts
+\ No newline at end of file
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index e4521d5..d28c178 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -102,14 +102,17 @@
 @N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 
-; Fold GEP of a GEP. Theoretically some of these cases could be folded
-; without using targetdata, however that's not implemented yet.
+; Fold GEP of a GEP. Very simple cases are folded without targetdata.
 
+; PLAIN: @Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 2)
 ; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; TO: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; TO: @Z = global i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 
 @ext = external global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 1), i64 1)
 @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 
 ; Duplicate all of the above as function return values rather than
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index 4aa984e..ca2b1a3 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -151,7 +151,7 @@ entry:
 exit:
   %t3 = phi i32* [ %t4, %exit ]
   %t4 = bitcast i32* %t3 to i32*
-  %x = volatile load i32* %t3
+  %x = load volatile i32* %t3
   br label %exit
 }
 
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 37cda30..a28c9b0 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -constprop -S | grep {ret i13 13}
 ; PR1816
-declare i13 @llvm.cttz.i13(i13)
+declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 3b6010a..7a405a5 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
+; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
 
 declare double @cos(double)
 
@@ -59,3 +60,90 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+define double @test_intrinsic_pow() nounwind uwtable ssp {
+entry:
+; CHECK: @test_intrinsic_pow
+; CHECK-NOT: call
+  %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
+  ret double %0
+}
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+; Shouldn't fold because of -fno-builtin
+define double @sin_() nounwind uwtable ssp {
+; FNOBUILTIN: @sin_
+; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
+  %1 = call double @sin(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @sqrt_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrt_
+; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
+  %1 = call double @sqrt(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define float @sqrtf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrtf_
+; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
+  %1 = call float @sqrtf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sqrtf(float)
+
+; Shouldn't fold because of -fno-builtin
+define float @sinf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sinf_
+; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
+  %1 = call float @sinf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sinf(float)
+
+; Shouldn't fold because of -fno-builtin
+define double @tan_() nounwind uwtable ssp {
+; FNOBUILTIN: @tan_
+; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
+  %1 = call double @tan(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @tanh_() nounwind uwtable ssp {
+; FNOBUILTIN: @tanh_
+; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
+  %1 = call double @tanh(double 3.000000e+00)
+  ret double %1
+}
+declare double @tanh(double)
+
+; Shouldn't fold because of -fno-builtin
+define double @pow_() nounwind uwtable ssp {
+; FNOBUILTIN: @pow_
+; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @pow(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @fmod_() nounwind uwtable ssp {
+; FNOBUILTIN: @fmod_
+; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @fmod(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @atan2_() nounwind uwtable ssp {
+; FNOBUILTIN: @atan2_
+; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @atan2(double, double)
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index b2d63ec..e3fe1bb 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -30,10 +30,10 @@ entry:
 define void @h() {
 entry:
   %i = alloca i32, align 4
-  volatile store i32 10, i32* %i, align 4
+  store volatile i32 10, i32* %i, align 4
 ; CHECK: %tmp = load volatile i32* %i, align 4
 ; CHECK-next: call void @f(i32 undef)
-  %tmp = volatile load i32* %i, align 4
+  %tmp = load volatile i32* %i, align 4
   call void @f(i32 %tmp)
   ret void
 }
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
index 828ccc5..ed53eb5 100644
--- a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
+++ b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -76,3 +76,20 @@ entry:
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+%struct.trapframe = type { i64, i64, i64 }
+
+; bugzilla 11455 - make sure negative GEP's don't break this optimisation
+; CHECK: @cpu_lwp_fork
+define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
+entry:
+  %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe*
+  %add.ptr = getelementptr inbounds %struct.trapframe* %0, i64 -1
+  %1 = bitcast %struct.trapframe* %add.ptr to i8*
+  %2 = bitcast %struct.trapframe* %md_regs to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  %tf_trapno = getelementptr inbounds %struct.trapframe* %0, i64 -1, i32 1
+  store i64 3, i64* %tf_trapno, align 8
+  ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index 168bd95..a5fbdc7 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -58,3 +58,13 @@ skipinit1:
   tail call void @free(i8* %alloc1) nounwind
   ret void
 }
+
+; CHECK: @test5
+define void @test5() {
+  br label %bb
+
+bb:
+  tail call void @free(i8* undef) nounwind
+  br label %bb
+}
+
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 57b1697..32c302c 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -10,22 +10,22 @@ define void @test1(i8 %V, i32 *%P) {
   
   %C = zext i8 %V to i32
   %D = zext i8 %V to i32  ;; CSE
-  volatile store i32 %C, i32* %P
-  volatile store i32 %D, i32* %P
+  store volatile i32 %C, i32* %P
+  store volatile i32 %D, i32* %P
   ; CHECK-NEXT: %C = zext i8 %V to i32
   ; CHECK-NEXT: store volatile i32 %C
   ; CHECK-NEXT: store volatile i32 %C
   
   %E = add i32 %C, %C
   %F = add i32 %C, %C
-  volatile store i32 %E, i32* %P
-  volatile store i32 %F, i32* %P
+  store volatile i32 %E, i32* %P
+  store volatile i32 %F, i32* %P
   ; CHECK-NEXT: %E = add i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %E
   ; CHECK-NEXT: store volatile i32 %E
 
   %G = add nuw i32 %C, %C         ;; not a CSE with E
-  volatile store i32 %G, i32* %P
+  store volatile i32 %G, i32* %P
   ; CHECK-NEXT: %G = add nuw i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %G
   ret void
diff --git a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
index 85df09e..b7e4d1f 100644
--- a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -4,6 +4,6 @@
 @g = global i32 0		; <i32*> [#uses=1]
 
 define i32 @f() {
-	%t = volatile load i32* @g		; <i32> [#uses=1]
+	%t = load volatile i32* @g		; <i32> [#uses=1]
 	ret i32 %t
 }
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index f21fabc..93991d2 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -5,6 +5,6 @@
 
 define void @foo() {
 ; CHECK: void @foo() {
-  %tmp = volatile load i32* @g
+  %tmp = load volatile i32* @g
   ret void
 }
diff --git a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll b/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
deleted file mode 100644
index 29864f8..0000000
--- a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
+++ /dev/null
@@ -1,264 +0,0 @@
-; RUN: opt < %s -globaldce | llc -O0 -o /dev/null
-
-%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
-%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>" = type { i32 }
-%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-%struct.pthread_attr_t = type { i64, [48 x i8] }
-%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
-
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_init ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)* @pthread_mutexattr_settype ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_destroy ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-
-define weak void @_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_(i8* %__t) {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !0)
-  tail call void @llvm.dbg.stoppoint(i32 240, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !0)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !8)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !8)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !11)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !11)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !12)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !12)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !13)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !13)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !14)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !14)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !15)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !15)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !16)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !16)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !17)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !17)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !18)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !18)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !19)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !19)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !20)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !20)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !21)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !21)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !22)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !22)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !23)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !23)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !24)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !24)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !25)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !25)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !26)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !26)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !27)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !27)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !28)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !28)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !29)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !29)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !30)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !30)
-  ret void
-}
-
-declare void @llvm.dbg.func.start(metadata) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
-
-declare void @llvm.dbg.region.end(metadata) nounwind readnone
-
-declare extern_weak i32 @pthread_once(i32*, void ()*)
-
-declare extern_weak i8* @pthread_getspecific(i32)
-
-declare extern_weak i32 @pthread_setspecific(i32, i8*)
-
-declare extern_weak i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare extern_weak i32 @pthread_cancel(i64)
-
-declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare extern_weak i32 @pthread_key_delete(i32)
-
-declare extern_weak i32 @pthread_mutexattr_init(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_mutexattr_settype(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)
-
-declare extern_weak i32 @pthread_mutexattr_destroy(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__aux_require_boolean_expr<bool>", metadata !"__aux_require_boolean_expr<bool>", metadata !"_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_", metadata !2, i32 239, metadata !3, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"concept-inst.cc", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/src/../../../../llvm-gcc.src/libstdc++-v3/src", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}
-!2 = metadata !{i32 458769, i32 0, i32 4, metadata !"boost_concept_check.h", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/include/bits", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 false, i1 true, metadata !"", i32 0}
-!3 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 458768, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6}
-!6 = metadata !{i32 458790, metadata !1, metadata !"", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !7}
-!7 = metadata !{i32 458788, metadata !1, metadata !"bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2}
-!8 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!9 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0}
-!10 = metadata !{null}
-!11 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!12 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!13 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!14 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!16 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!17 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!18 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!19 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!20 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!21 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!22 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!23 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!24 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!25 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!26 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!27 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!28 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!29 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!30 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index a6803ab..588d5c9 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -3,7 +3,7 @@
 
 define double @foo() nounwind  {
 entry:
-	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
+	%tmp1 = load volatile double* @t0.1441, align 8		; <double> [#uses=2]
 	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll b/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
deleted file mode 100644
index 0e70c49..0000000
--- a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -globalopt -S | grep { nest } | count 1
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-@.str = internal constant [7 x i8] c"%d %d\0A\00"		; <[7 x i8]*> [#uses=1]
-
-define i32 @process(i32 (i32)* %func) nounwind  {
-entry:
-	%tmp2 = tail call i32 %func( i32 1 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
-
-define internal fastcc i32 @g.1478(%struct.FRAME.nest* nest  %CHAIN.1, i32 %m) nounwind  {
-entry:
-	%tmp3 = getelementptr %struct.FRAME.nest* %CHAIN.1, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
-	%tmp7 = icmp eq i32 %tmp4, %m		; <i1> [#uses=1]
-	%tmp78 = zext i1 %tmp7 to i32		; <i32> [#uses=1]
-	ret i32 %tmp78
-}
-
-define internal i32 @f.1481(%struct.FRAME.nest* nest  %CHAIN.2, i32 %m) nounwind  {
-entry:
-	%tmp4 = tail call fastcc i32 @g.1478( %struct.FRAME.nest* nest  %CHAIN.2, i32 %m ) nounwind 		; <i32> [#uses=1]
-	%tmp6 = getelementptr %struct.FRAME.nest* %CHAIN.2, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
-	%tmp9 = icmp eq i32 %tmp4, %tmp7		; <i1> [#uses=1]
-	%tmp910 = zext i1 %tmp9 to i32		; <i32> [#uses=1]
-	ret i32 %tmp910
-}
-
-define i32 @nest(i32 %n) nounwind  {
-entry:
-	%TRAMP.316 = alloca [10 x i8]		; <[10 x i8]*> [#uses=1]
-	%FRAME.0 = alloca %struct.FRAME.nest		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.316.sub = getelementptr [10 x i8]* %TRAMP.316, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.316.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32)* @f.1481 to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 4
-	%tmp13 = call i32 @process( i32 (i32)* %tmp89 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp13
-}
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-define i32 @main() nounwind  {
-entry:
-	%tmp = tail call i32 @nest( i32 2 ) nounwind 		; <i32> [#uses=1]
-	%tmp1 = tail call i32 @nest( i32 1 ) nounwind 		; <i32> [#uses=1]
-	%tmp3 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([7 x i8]* @.str, i32 0, i32 0), i32 %tmp1, i32 %tmp ) nounwind 		; <i32> [#uses=0]
-	ret i32 undef
-}
-
-declare i32 @printf(i8*, ...) nounwind 
diff --git a/test/Transforms/IPConstantProp/dangling-block-address.ll b/test/Transforms/IPConstantProp/dangling-block-address.ll
index 0489dfa..bb10133 100644
--- a/test/Transforms/IPConstantProp/dangling-block-address.ll
+++ b/test/Transforms/IPConstantProp/dangling-block-address.ll
@@ -12,7 +12,7 @@
 define void @foo(i32 %x) nounwind readnone {
 entry:
   %b = alloca i32, align 4                        ; <i32*> [#uses=1]
-  volatile store i32 -1, i32* %b
+  store volatile i32 -1, i32* %b
   ret void
 }
 
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index a8b706e..4ad63aa 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -3,9 +3,6 @@
 ; add219 should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
-; FIXME: GetExtendedOperandRecurrence has problems with the nsw bit on add exprs
-; XFAIL: *
-
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
 define void @test() nounwind {
@@ -22,7 +19,7 @@ for.body153:                                      ; preds = %for.body153, %for.b
 
 ; CHECK: add nsw i64 %indvars.iv, 1
 for.body170:                                      ; preds = %for.body170, %for.body153
-  %i2.19 = phi i32 [ %add249, %for.body170 ], [ undef, %for.body153 ]
+  %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
   %add219 = add nsw i32 %i2.19, 1
   %idxprom220 = sext i32 %add219 to i64
   %add249 = add nsw i32 %i2.19, %shl132
diff --git a/test/Transforms/IndVarSimplify/avoid-i0.ll b/test/Transforms/IndVarSimplify/avoid-i0.ll
index 59661fa..22f2e4b 100644
--- a/test/Transforms/IndVarSimplify/avoid-i0.ll
+++ b/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -90,7 +90,7 @@ entry:
 	br label %bb4
 
 bb:		; preds = %bb4
-	%0 = volatile load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @x, align 4		; <i32> [#uses=1]
 	store i32 %0, i32* %vol.0, align 4
 	store i32 0, i32* %l_52, align 4
 	br label %bb2
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
index 2f3100f..a62943d 100644
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
@@ -14,7 +14,7 @@ loop:
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
   %ip = add i64 %i, 1
   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %ip, i64 1
-  volatile store double 0.0, double* %p.2.ip.1
+  store volatile double 0.0, double* %p.2.ip.1
   %i.next = add i64 %i, 1
   br label %loop
 }
diff --git a/test/Transforms/IndVarSimplify/sink-alloca.ll b/test/Transforms/IndVarSimplify/sink-alloca.ll
index e7d642c..64207d8 100644
--- a/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -18,8 +18,8 @@ while.cond:                                       ; preds = %while.cond, %entry
   br i1 %tobool, label %while.end, label %while.cond
 
 while.end:                                        ; preds = %while.cond
-  volatile store i32 0, i32* %result.i
-  %tmp.i = volatile load i32* %result.i           ; <i32> [#uses=0]
+  store volatile i32 0, i32* %result.i
+  %tmp.i = load volatile i32* %result.i           ; <i32> [#uses=0]
   ret i32 0
 }
 declare i32 @bar()
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 1d5ebbb..d56b390 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -17,7 +17,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
   call void @foo(i32 %1) nounwind ssp
-  volatile store i32 1, i32* @g, align 4
+  store volatile i32 1, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
@@ -42,7 +42,7 @@ entry:
   %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
   %1 = sub nsw i32 %x, 1
   call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 42, i32* @g, align 4
+  store volatile i32 42, i32* @g, align 4
   ret void
 }
 
@@ -54,7 +54,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = bitcast i8* %Foo to void (i32, i8*, i8*)*  ; <void (i32, i8*, i8*)*> [#uses=1]
   call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 13, i32* @g, align 4
+  store volatile i32 13, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
diff --git a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
index 3297919..7f73908 100644
--- a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
+++ b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -2,6 +2,6 @@
 
 define void @test(i32* %P) {
         ; Dead but not deletable!
-        %X = volatile load i32* %P              ; <i32> [#uses=0]
+        %X = load volatile i32* %P              ; <i32> [#uses=0]
         ret void
 }
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
deleted file mode 100644
index 6190aa9..0000000
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep {call i32 @f}
-
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-declare i32 @f(%struct.FRAME.nest* nest , i32 )
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32 1 )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
index 76bceb6..4c5c367 100644
--- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll
+++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -26,7 +26,7 @@ lab:		; preds = %cleanup31, %entry
 	%tmp21 = getelementptr i32* %tmp1819, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp21, align 4
 	%tmp2223 = bitcast i32* %tmp1819 to i8*		; <i8*> [#uses=1]
-	volatile store i8* %tmp2223, i8** @p, align 4
+	store volatile i8* %tmp2223, i8** @p, align 4
 	%tmp25 = add i32 %n.0, 1		; <i32> [#uses=2]
 	%tmp27 = icmp sle i32 %tmp25, 999999		; <i1> [#uses=1]
 	%tmp2728 = zext i1 %tmp27 to i8		; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll b/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
deleted file mode 100644
index 6401dfd..0000000
--- a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-
-	%struct.FRAME.nest = type { i32, i32 (i32*)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-declare i32 @f(%struct.FRAME.nest* nest , i32*)
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32*)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32*)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32*)*		; <i32 (i32*)*> [#uses=2]
-	store i32 (i32*)* %tmp89, i32 (i32*)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32* nest  null )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
index 6847f5e..de08c32 100644
--- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
+++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -2,7 +2,7 @@
 
 define void @test() {
 	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
-	volatile store <4 x float> zeroinitializer, <4 x float>* %votf, align 16
+	store volatile <4 x float> zeroinitializer, <4 x float>* %votf, align 16
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index a24f307..1286e3d 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -6,17 +6,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index 5fb11ff..ebbd3a7 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -7,11 +7,11 @@ target triple = "i386-apple-darwin8"
 define i32 @main(i32 %i) nounwind  {
 entry:
 	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp93, label %bb11, label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
index 8307834..4f4709b 100644
--- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
+++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp2752 = alloca i32		; <i32*> [#uses=2]
 	%tmpcast53 = bitcast i32* %tmp2752 to i8*		; <i8*> [#uses=1]
 	store i32 2, i32* %tmp2752, align 4
-	volatile store i8* %tmpcast53, i8** @p, align 4
+	store volatile i8* %tmpcast53, i8** @p, align 4
 	br label %bb44
 
 bb:		; preds = %bb44
@@ -29,7 +29,7 @@ bb44:		; preds = %bb44, %entry
 	store i32 1, i32* %tmp27, align 4
 	%tmp34 = getelementptr i32* %tmp27, i32 %tmp4		; <i32*> [#uses=1]
 	store i32 2, i32* %tmp34, align 4
-	volatile store i8* %tmpcast, i8** @p, align 4
+	store volatile i8* %tmpcast, i8** @p, align 4
 	%exitcond = icmp eq i32 %tmp3857, 999999		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb, label %bb44
 }
diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index 8104408..1ed5323 100644
--- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -7,17 +7,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp3, %bb ], [ %tmp34, %entry ]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index 6e8ad87..d4a5d42 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; Don't assume that external global variables have their preferred
-; alignment. They may only have the ABI minimum alignment.
+; Don't assume that external global variables or those with weak linkage have
+; their preferred alignment. They may only have the ABI minimum alignment.
 
 ; CHECK: %s = shl i64 %a, 3
 ; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
@@ -11,7 +11,7 @@
 target datalayout = "-i32:8:32"
 
 @A = external global i32
-@B = external global i32
+@B = weak_odr global i32 0
 
 define i64 @foo(i64 %a) {
   %t = ptrtoint i32* @A to i64
@@ -20,3 +20,10 @@ define i64 @foo(i64 %a) {
   %q = add i64 %r, 1
   ret i64 %q
 }
+
+define i32 @bar() {
+; CHECK: @bar
+  %r = load i32* @B, align 1
+; CHECK: align 1
+  ret i32 %r
+}
diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll
index f75ca2d..a6fd837 100644
--- a/test/Transforms/InstCombine/bitcount.ll
+++ b/test/Transforms/InstCombine/bitcount.ll
@@ -4,13 +4,13 @@
 ; RUN:   grep -v declare | not grep llvm.ct
 
 declare i31 @llvm.ctpop.i31(i31 %val) 
-declare i32 @llvm.cttz.i32(i32 %val) 
-declare i33 @llvm.ctlz.i33(i33 %val) 
+declare i32 @llvm.cttz.i32(i32 %val, i1) 
+declare i33 @llvm.ctlz.i33(i33 %val, i1) 
 
 define i32 @test(i32 %A) {
   %c1 = call i31 @llvm.ctpop.i31(i31 12415124)
-  %c2 = call i32 @llvm.cttz.i32(i32 87359874)
-  %c3 = call i33 @llvm.ctlz.i33(i33 87359874)
+  %c2 = call i32 @llvm.cttz.i32(i32 87359874, i1 true)
+  %c3 = call i33 @llvm.ctlz.i33(i33 87359874, i1 true)
   %t1 = zext i31 %c1 to i32
   %t3 = trunc i33 %c3 to i32
   %r1 = add i32 %t1, %c2
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index c679226..e5b16ea 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @Y = internal global [3 x %struct.X] zeroinitializer
 
 define void @frob() {
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
@@ -33,7 +33,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
@@ -47,7 +47,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
-; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
 ; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index cf36b8f..5e4c677 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -88,7 +88,7 @@ define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
 ; CHECK-NEXT: ret
 define i32 @nogep-multiuse({i32, i32}* %pair) {
         ; The load should be left unchanged since both parts are needed.
-        %L = volatile load {i32, i32}* %pair
+        %L = load volatile {i32, i32}* %pair
         %LHS = extractvalue {i32, i32} %L, 0
         %RHS = extractvalue {i32, i32} %L, 1
         %R = add i32 %LHS, %RHS
@@ -100,8 +100,8 @@ define i32 @nogep-multiuse({i32, i32}* %pair) {
 ; CHECK-NEXT: extractvalue
 ; CHECK-NEXT: ret
 define i32 @nogep-volatile({i32, i32}* %pair) {
-        ; The volatile load should be left unchanged.
-        %L = volatile load {i32, i32}* %pair
+        ; The load volatile should be left unchanged.
+        %L = load volatile {i32, i32}* %pair
         %E = extractvalue {i32, i32} %L, 1
         ret i32 %E
 }
diff --git a/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
new file mode 100644
index 0000000..bd92b4a
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
@@ -0,0 +1,17 @@
+; RUN: opt -instcombine -S -disable-simplify-libcalls < %s | FileCheck %s
+; rdar://10466410
+
+; Instcombine tries to fold (fptrunc (sqrt (fpext x))) -> (sqrtf x), but this
+; shouldn't fold when sqrtf isn't available.
+define float @foo(float %f) uwtable ssp {
+entry:
+; CHECK: %conv = fpext float %f to double
+; CHECK: %call = tail call double @sqrt(double %conv)
+; CHECK: %conv1 = fptrunc double %call to float
+  %conv = fpext float %f to double
+  %call = tail call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+declare double @sqrt(double)
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 77ca62c..016e8c5 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -559,3 +559,24 @@ define i1 @test57(i32 %a) {
   call void @foo(i32 %and)
   ret i1 %cmp
 }
+
+; rdar://problem/10482509
+; CHECK: @cmpabs1
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs1(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %sub, i64 %val
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+; CHECK: @cmpabs2
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs2(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %val, i64 %sub
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index f033e51..e31bd7d 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -5,10 +5,10 @@
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 
 define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -142,13 +142,13 @@ define i32 @umultest4(i32 %n) nounwind {
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
-  volatile store double %A, double* %P
+  store volatile double %A, double* %P
 
   %B = tail call double @llvm.powi.f64(double %V, i32 0) nounwind
-  volatile store double %B, double* %P
+  store volatile double %B, double* %P
 
   %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
-  volatile store double %C, double* %P
+  store volatile double %C, double* %P
   ret void
 ; CHECK: @powi
 ; CHECK: %A = fdiv double 1.0{{.*}}, %V
@@ -161,7 +161,7 @@ define i32 @cttz(i32 %a) {
 entry:
   %or = or i32 %a, 8
   %and = and i32 %or, -8
-  %count = tail call i32 @llvm.cttz.i32(i32 %and) nounwind readnone
+  %count = tail call i32 @llvm.cttz.i32(i32 %and, i1 true) nounwind readnone
   ret i32 %count
 ; CHECK: @cttz
 ; CHECK-NEXT: entry:
@@ -172,7 +172,7 @@ define i8 @ctlz(i8 %a) {
 entry:
   %or = or i8 %a, 32
   %and = and i8 %or, 63
-  %count = tail call i8 @llvm.ctlz.i8(i8 %and) nounwind readnone
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) nounwind readnone
   ret i8 %count
 ; CHECK: @ctlz
 ; CHECK-NEXT: entry:
@@ -181,15 +181,15 @@ entry:
 
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
 entry:
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a) nounwind readnone
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
-  volatile store i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a) nounwind readnone
+  store volatile i1 %lz.cmp, i1* %c
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 true) nounwind readnone
   %tz.cmp = icmp ne i32 %tz, 32
-  volatile store i1 %tz.cmp, i1* %c
+  store volatile i1 %tz.cmp, i1* %c
   %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
   %pop.cmp = icmp eq i32 %pop, 0
-  volatile store i1 %pop.cmp, i1* %c
+  store volatile i1 %pop.cmp, i1* %c
   ret void
 ; CHECK: @cmp.simplify
 ; CHECK-NEXT: entry:
@@ -203,7 +203,7 @@ entry:
 
 
 define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x)    ; <i32> [#uses=1]
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)    ; <i32> [#uses=1]
   %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
   ret i32 %shr3
   
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 9123283..81ceef8 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -130,4 +130,26 @@ entry:
   ret i64 %Q
 }
 
+; CHECK: @test8
+; PR11438
+; This is @test1, but the operands are not sign-extended.  Make sure
+; we don't transform this case.
+define i32 @test8(i64 %a, i64 %b) nounwind ssp {
+entry:
+; CHECK-NOT: llvm.sadd
+; CHECK: add i64 %a, %b
+; CHECK-NOT: llvm.sadd
+; CHECK: ret
+  %add = add i64 %a, %b
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
 
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 4661561..4baae26 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -809,3 +809,23 @@ define i32 @test61(i32* %ptr) {
 ; CHECK: @test61
 ; CHECK: ret i32 10
 }
+
+define i1 @test62(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %not, i1 %B             
+        ret i1 %C
+; CHECK: @test62
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = and i1 %not, %B
+; CHECK: ret i1 %C
+}
+
+define i1 @test63(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %B, i1 %not         
+        ret i1 %C
+; CHECK: @test63
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = or i1 %B, %not
+; CHECK: ret i1 %C
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f49a2ef..f198797 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -3,8 +3,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 declare i32 @llvm.ctpop.i32(i32)
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i64 @test1(i32 %x) {
   %t = call i32 @llvm.ctpop.i32(i32 %x)
@@ -16,7 +16,7 @@ define i64 @test1(i32 %x) {
 }
 
 define i64 @test2(i32 %x) {
-  %t = call i32 @llvm.ctlz.i32(i32 %x)
+  %t = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
@@ -25,7 +25,7 @@ define i64 @test2(i32 %x) {
 }
 
 define i64 @test3(i32 %x) {
-  %t = call i32 @llvm.cttz.i32(i32 %x)
+  %t = call i32 @llvm.cttz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
new file mode 100644
index 0000000..6523622
--- /dev/null
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine %s -disable-output
+; RUN: opt -instsimplify %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@G1 = global i8 zeroinitializer
+
+define <2 x i1> @test(<2 x i8*> %a, <2 x i8*> %b) {
+   %A = icmp eq <2 x i8*> %a, %b
+   ret <2 x i1> %A
+}
+
+define <2 x i1> @test2(<2 x i8*> %a) {
+  %A = inttoptr <2 x i32> <i32 1, i32 2> to <2 x i8*>
+  %B = icmp ult <2 x i8*> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <2 x i1> @test3(<2 x i8*> %a) {
+  %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <1 x i1> @test4(<1 x i8*> %a) {
+  %g = getelementptr <1 x i8*> %a, <1 x i32> <i32 1>
+  %B = icmp ult <1 x i8*> %g, zeroinitializer
+  ret <1 x i1> %B
+}
+
+define <2 x i1> @test5(<2 x i8*> %a) {
+  %w = getelementptr <2 x i8*> %a, <2 x i32> zeroinitializer
+  %e = getelementptr <2 x i8*> %w, <2 x i32> <i32 5, i32 9>
+  %g = getelementptr <2 x i8*> %e, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 0518e5a..2256678 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -5,8 +5,8 @@
 
 define void @self_assign_1() {
 entry:
-	%tmp = volatile load i32* @x		; <i32> [#uses=1]
-	volatile store i32 %tmp, i32* @x
+	%tmp = load volatile i32* @x		; <i32> [#uses=1]
+	store volatile i32 %tmp, i32* @x
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
new file mode 100644
index 0000000..f65260e
--- /dev/null
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -0,0 +1,8 @@
+;RUN: opt -instsimplify %s -disable-output
+declare void @helper(<2 x i8*>)
+define void @test(<2 x i8*> %a) {
+  %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
+  call void @helper(<2 x i8*> %A)
+  ret void
+}
+
diff --git a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
index 46aaa00..e80bae5 100644
--- a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
+++ b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -15,7 +15,7 @@ for.cond1177:
   br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
 
 land.rhs1320:
-  %tmp1324 = volatile load i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
+  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
   br label %if.end.i
 
 if.end.i:
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 2115dd3..b9c0354 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -399,7 +399,7 @@ if.then237:
   br label %lbl_664
 
 lbl_596:                                          ; preds = %lbl_664, %for.end37
-  volatile store i64 undef, i64* undef, align 4
+  store volatile i64 undef, i64* undef, align 4
   br label %for.cond111
 
 for.cond111:                                      ; preds = %safe_sub_func_int64_t_s_s.exit, %lbl_596
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index 7c7fe39..a4914f9 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -17,11 +17,11 @@ bb:		; preds = %bb4
 	br i1 %0, label %bb1, label %bb2
 
 bb1:		; preds = %bb
-	volatile store i32 1000, i32* @v1, align 4
+	store volatile i32 1000, i32* @v1, align 4
 	br label %bb3
 
 bb2:		; preds = %bb
-	volatile store i32 1001, i32* @v1, align 4
+	store volatile i32 1001, i32* @v1, align 4
 	br label %bb3
 
 bb3:		; preds = %bb2, %bb1
diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index 17383c2..4df6ea7 100644
--- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -10,7 +10,7 @@ entry:
 	br label %bb6
 
 bb:		; preds = %bb6
-	%tmp2 = volatile load i32* %DataIn		; <i32> [#uses=1]
+	%tmp2 = load volatile i32* %DataIn		; <i32> [#uses=1]
 	%tmp3 = getelementptr [64 x i32]* %buffer, i32 0, i32 %i.0		; <i32*> [#uses=1]
 	store i32 %tmp2, i32* %tmp3
 	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
@@ -28,7 +28,7 @@ bb12:		; preds = %bb22
 	%tmp16 = add i32 %tmp14, %i.1		; <i32> [#uses=1]
 	%tmp17 = getelementptr [64 x i32]* %buffer, i32 0, i32 %tmp16		; <i32*> [#uses=1]
 	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
-	volatile store i32 %tmp18, i32* %DataOut
+	store volatile i32 %tmp18, i32* %DataOut
 	%tmp21 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb22
 
diff --git a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
index fd114f4..2bbc6ab 100644
--- a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
+++ b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
@@ -19,7 +19,7 @@ for.body4.lr.ph:
 
 for.body4:
   %l_612.11 = phi i32* [ undef, %for.body4.lr.ph ], [ %call19, %for.body4 ]
-  %tmp7 = volatile load i16* @g_39, align 2
+  %tmp7 = load volatile i16* @g_39, align 2
   %call = call i32** @func_108(i32*** undef)
   %call19 = call i32* @func_84(i32** %call)
   br i1 false, label %for.body4, label %for.cond.loopexit
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index ff7fa0b..de41d00 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -68,7 +68,7 @@ define void @test4() noreturn nounwind {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
-  volatile store i32* @g_47, i32** undef, align 8
+  store volatile i32* @g_47, i32** undef, align 8
   store i32 undef, i32* @g_47, align 4
   br label %1
 }
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 9aefc4f..05a64d6 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -59,7 +59,7 @@ define void @test3(i32 %i) {
 	br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = volatile load i32* @X
+	%x = load volatile i32* @X
 	%x2 = add i32 %x, 1	
 	store i32 %x2, i32* @X
 	br i1 true, label %Out, label %Loop
@@ -133,7 +133,7 @@ Loop:		; preds = %Loop, %0
 	%x2 = add i32 %x, 1		; <i32> [#uses=1]
 	store i32 %x2, i32* @X
         
-        volatile store i32* @X, i32** %P2
+        store volatile i32* @X, i32** %P2
         
 	%Next = add i32 %j, 1		; <i32> [#uses=2]
 	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
new file mode 100644
index 0000000..507b193
--- /dev/null
+++ b/test/Transforms/LICM/speculate.ll
@@ -0,0 +1,167 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+
+; UDiv is safe to speculate if the denominator is known non-zero.
+
+; CHECK: @safe_udiv
+; CHECK:      %div = udiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; UDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_udiv
+; CHECK-NOT:  udiv
+; CHECK: for.body:
+
+define void @unsafe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %m
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is safe to speculate if the denominator is known non-zero and
+; known to have at least one zero bit.
+
+; CHECK: @safe_sdiv
+; CHECK:      %div = sdiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  %or = or i64 %and, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_sdiv_a
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_a(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known to have a zero bit.
+
+; CHECK: @unsafe_sdiv_b
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_b(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %and
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
new file mode 100644
index 0000000..173a582
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+; PR11575
+
+@catchtypeinfo = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @main() uwtable ssp {
+entry:
+  invoke void @f1()
+          to label %try.cont19 unwind label %catch
+
+; CHECK: catch.preheader:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+; CHECK: catch.split-lp:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+catch:                                            ; preds = %if.else, %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  invoke void @f3()
+          to label %if.else unwind label %eh.resume
+
+if.else:                                          ; preds = %catch
+  invoke void @f2()
+          to label %try.cont19 unwind label %catch
+
+try.cont19:                                       ; preds = %if.else, %entry
+  ret void
+
+eh.resume:                                        ; preds = %catch
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  resume { i8*, i32 } undef
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @f1()
+
+declare void @f2()
+
+declare void @f3()
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index 90477d1..ce56bd3 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -10,7 +10,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%l_2.0.reg2mem.0 = phi i16 [ 0, %entry ], [ %t1, %bb ]		; <i16> [#uses=2]
 	%t0 = shl i16 %l_2.0.reg2mem.0, 1		; <i16>:0 [#uses=1]
-	volatile store i16 %t0, i16* @g_3, align 2
+	store volatile i16 %t0, i16* @g_3, align 2
 	%t1 = add i16 %l_2.0.reg2mem.0, -3		; <i16>:1 [#uses=2]
 	%t2 = icmp slt i16 %t1, 1		; <i1>:2 [#uses=1]
 	br i1 %t2, label %bb, label %return
@@ -22,7 +22,7 @@ return:		; preds = %bb
 define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
-	volatile load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	load volatile i16* @g_3, align 2		; <i16>:0 [#uses=1]
 	zext i16 %0 to i32		; <i32>:1 [#uses=1]
 	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
 	ret i32 0
diff --git a/test/Transforms/LoopStrengthReduce/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/2011-11-29-postincphi.ll
new file mode 100644
index 0000000..cb23ad0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-11-29-postincphi.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+;
+; PR11431: handle a phi operand that is replaced by a postinc user.
+; LSR first expands %t3 to %t2 in %phi
+; LSR then expands %t2 in %phi into two decrements, one on each loop exit.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i1 @check() nounwind
+
+; Check that LSR did something close to the behavior at the time of the bug.
+; CHECK: @sqlite3DropTriggerPtr
+; CHECK: incq %rax
+; CHECK: jne
+; CHECK: decq %rax
+; CHECK: ret
+define i64 @sqlite3DropTriggerPtr() nounwind {
+bb:
+  %cmp = call zeroext i1 @check()
+  br label %bb1
+
+bb1:                                              ; preds = %bb4, %bb
+  %t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ]
+  %t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ]
+  %t3 = add nsw i64 %t0, 1
+  br i1 %cmp, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1
+  %t5 = add nsw i64 %t2, 1
+  br i1 %cmp, label %bb1, label %bb8
+
+bb8:                                              ; preds = %bb8, %bb4
+  %phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ]
+  ret i64 %phi
+}
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
new file mode 100644
index 0000000..5108650
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-04-loserreg.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Test LSR's ability to prune formulae that refer to nonexistant
+; AddRecs in other loops.
+;
+; Unable to reduce this case further because it requires LSR to exceed
+; ComplexityLimit.
+;
+; We really just want to ensure that LSR can process this loop without
+; finding an unsatisfactory solution and bailing out. I've added
+; dummyout, an obvious candidate for postinc replacement so we can
+; verify that LSR removes it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: @test
+; CHECK: # %for.body{{$}}
+; dummyiv copy should be removed
+; CHECK-NOT: movq
+; CHECK: # %for.cond19.preheader
+; dummycnt should be removed
+; CHECK-NOT: incq
+; CHECK: # %for.body23{{$}}
+define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
+entry:
+  %cmp34 = icmp eq i64 %count, 0
+  br i1 %cmp34, label %for.end29, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
+  %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
+  %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
+  %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
+  %incdec.ptr = getelementptr inbounds float* %p.035, i64 1
+  %0 = load float* %incdec.ptr, align 4
+  %incdec.ptr2 = getelementptr inbounds float* %p.035, i64 2
+  %1 = load float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float* %p.035, i64 3
+  %2 = load float* %incdec.ptr3, align 4
+  %incdec.ptr4 = getelementptr inbounds float* %p.035, i64 4
+  %3 = load float* %incdec.ptr4, align 4
+  %4 = load i32* %dp.036, align 4
+  %conv5 = fptoui float %0 to i32
+  %or = or i32 %4, %conv5
+  %arrayidx6 = getelementptr inbounds i32* %dp.036, i64 1
+  %5 = load i32* %arrayidx6, align 4
+  %conv7 = fptoui float %1 to i32
+  %or8 = or i32 %5, %conv7
+  %arrayidx9 = getelementptr inbounds i32* %dp.036, i64 2
+  %6 = load i32* %arrayidx9, align 4
+  %conv10 = fptoui float %2 to i32
+  %or11 = or i32 %6, %conv10
+  %arrayidx12 = getelementptr inbounds i32* %dp.036, i64 3
+  %7 = load i32* %arrayidx12, align 4
+  %conv13 = fptoui float %3 to i32
+  %or14 = or i32 %7, %conv13
+  store i32 %or, i32* %dp.036, align 4
+  store i32 %or8, i32* %arrayidx6, align 4
+  store i32 %or11, i32* %arrayidx9, align 4
+  store i32 %or14, i32* %arrayidx12, align 4
+  %add.ptr = getelementptr inbounds i32* %dp.036, i64 4
+  %indvars.iv.next40 = add i64 %indvars.iv39, 4
+  %dummycnt = add i64 %dummyiv, 1
+  %cmp = icmp ult i64 %indvars.iv.next40, %count
+  br i1 %cmp, label %for.body, label %for.cond19.preheader
+
+for.cond19.preheader:                             ; preds = %for.body
+  %dummyout = add i64 %dummyiv, 1
+  %rem = and i64 %count, 3
+  %cmp2130 = icmp eq i64 %rem, 0
+  br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
+
+for.body23.lr.ph:                                 ; preds = %for.cond19.preheader
+  %8 = and i64 %count, 3
+  br label %for.body23
+
+for.body23:                                       ; preds = %for.body23, %for.body23.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
+  %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
+  %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
+  %incdec.ptr24 = getelementptr inbounds float* %p.131, i64 1
+  %9 = load float* %incdec.ptr24, align 4
+  %10 = load i32* %dp.132, align 4
+  %conv25 = fptoui float %9 to i32
+  %or26 = or i32 %10, %conv25
+  store i32 %or26, i32* %dp.132, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %incdec.ptr28 = getelementptr inbounds i32* %dp.132, i64 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %8
+  br i1 %exitcond, label %for.end29, label %for.body23
+
+for.end29:                                        ; preds = %entry, %for.body23, %for.cond19.preheader
+  %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
+  ret i64 %result
+}
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index abbfda6..ad4959b 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -9,7 +9,7 @@ entry:
 	br label %no_exit
 no_exit:		; preds = %no_exit, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
-	volatile store float 0.000000e+00, float* %D
+	store volatile float 0.000000e+00, float* %D
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
 ; CHECK: icmp
 ; CHECK-NEXT: br i1
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 2760915..76aa08c 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,12 +4,12 @@
 ; LSR should property handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %5 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %6 = lshr i64 %5, 1
-; CHECK:   %7 = mul i64 %6, 2
+; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   %5 = lshr i64 %4, 1
+; CHECK:   %6 = mul i64 %5, 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %7, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
diff --git a/test/Transforms/LoopStrengthReduce/pr3399.ll b/test/Transforms/LoopStrengthReduce/pr3399.ll
index b809007..26c5002 100644
--- a/test/Transforms/LoopStrengthReduce/pr3399.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -13,7 +13,7 @@ bb:		; preds = %bb5, %bb5.thread
 
 bb1:		; preds = %bb
 	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
-	%0 = volatile load i32* @g_53, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_53, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
 	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
 	%3 = mul i16 %2, %1		; <i16> [#uses=1]
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
new file mode 100644
index 0000000..d8bbea9
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
+
+; Tests for unrolling loops with run-time trip counts
+
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+
+; Still try to completely unroll loops with compile-time trip counts
+; even if the -unroll-runtime is specified
+
+; CHECK: for.body:
+; CHECK-NOT: for.body.unr:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
+; if the -unroll-runtime option is turned on
+
+; CHECK: bb72.2:
+
+define void @foo(i32 %trips) {
+entry:
+        br label %cond_true.outer
+
+cond_true.outer:
+        %indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+        br label %bb72
+
+bb72:
+        %indvar.next2 = add i32 %indvar1.ph, 1
+        %exitcond3 = icmp eq i32 %indvar.next2, %trips
+        br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+        ret void
+}
+
+
+; Test run-time unrolling for a loop that counts down by -2.
+
+; CHECK: for.body.unr:
+; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+
+define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i64 1
+  %0 = load i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
new file mode 100644
index 0000000..ad99b8c
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+
+; This tests that setting the unroll count works
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
new file mode 100644
index 0000000..cbc7af5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+
+; Choose a smaller, power-of-two, unroll count if the loop is too large.
+; This test makes sure we're not unrolling 'odd' counts
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll
new file mode 100644
index 0000000..55cf223
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+
+; Test that nested loops can be unrolled.  We need to increase threshold to do it
+
+; STATS: 2 loop-unroll - Number of loops unrolled (completely or otherwise)
+
+define i32 @nested(i32* nocapture %a, i32 %n, i32 %m) nounwind uwtable readonly {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.cond1.preheader.lr.ph, label %for.end7
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp28 = icmp sgt i32 %m, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %for.cond1.preheader.lr.ph
+  %indvars.iv16 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next17, %for.inc5 ]
+  %sum.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %sum.1.lcssa, %for.inc5 ]
+  br i1 %cmp28, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.cond1.preheader ]
+  %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ]
+  %0 = add nsw i64 %indvars.iv, %indvars.iv16
+  %arrayidx = getelementptr inbounds i32* %a, i64 %0
+  %1 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %1, %sum.19
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %m
+  br i1 %exitcond, label %for.inc5, label %for.body3
+
+for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
+  %sum.1.lcssa = phi i32 [ %sum.012, %for.cond1.preheader ], [ %add4, %for.body3 ]
+  %indvars.iv.next17 = add i64 %indvars.iv16, 1
+  %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+  %exitcond19 = icmp eq i32 %lftr.wideiv18, %n
+  br i1 %exitcond19, label %for.end7, label %for.cond1.preheader
+
+for.end7:                                         ; preds = %for.inc5, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.inc5 ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
new file mode 100644
index 0000000..8389fe4
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 2 loop-unswitch - Number of switches unswitched
+
+; CHECK:      %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %default.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %2 = icmp eq i32 %c, 2
+; CHECK-NEXT:   br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:     i32 2, label %dec.us4
+; CHECK-NEXT:   ]
+
+; CHECK:      dec.us4:                                          ; preds = %loop_begin.us1
+; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us5
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %default.us-lcssa.us-lcssa [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:     i32 2, label %dec
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc.split
+
+; CHECK:      dec:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable6, label %dec.split
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:  
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
new file mode 100644
index 0000000..21c0ec3
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 1 loop-unswitch - Number of switches unswitched
+
+; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK:        switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+; CHECK-NEXT:   ]
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split
+; CHECK:        switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
new file mode 100644
index 0000000..1b186d6
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -0,0 +1,138 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 3 loop-unswitch - Number of switches unswitched
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   %2 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
+
+; CHECK:      .split.us..split.us.split_crit_edge:              ; preds = %.split.us
+; CHECK-NEXT:   br label %.split.us.split
+
+; CHECK:      .split.us.split.us:                               ; preds = %.split.us
+; CHECK-NEXT:   br label %loop_begin.us.us
+
+; CHECK:      loop_begin.us.us:                                 ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
+; CHECK-NEXT:   %var_val.us.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
+; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
+; CHECK-NEXT:   switch i32 1, label %default.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us.split
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
+; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
+; CHECK-NEXT:   ]
+
+; CHECK:      inc.us3:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
+; CHECK:      second_switch.us4:                                ; preds = %loop_begin.us1
+; CHECK-NEXT:   switch i32 1, label %default.us5 [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %second_switch.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.inc_crit_edge:                      ; preds = %second_switch
+; CHECK-NEXT:   br i1 true, label %us-unreachable7, label %inc
+
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index 52a8375..ea0d515 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
 ; PR1520
-; Don't promote volatile loads/stores. This is really needed to handle setjmp/lonjmp properly.
+; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
@@ -14,7 +14,7 @@ entry:
 	%v = alloca i32, align 4		; <i32*> [#uses=3]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile store i32 0, i32* %v, align 4
+	store volatile i32 0, i32* %v, align 4
 	%tmp1 = call i32 @_setjmp( %struct.__jmp_buf_tag* getelementptr ([1 x %struct.__jmp_buf_tag]* @j, i32 0, i32 0) )		; <i32> [#uses=1]
 	%tmp2 = icmp ne i32 %tmp1, 0		; <i1> [#uses=1]
 	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
@@ -22,12 +22,12 @@ entry:
 	br i1 %toBool, label %bb, label %bb5
 
 bb:		; preds = %entry
-	%tmp4 = volatile load i32* %v, align 4		; <i32> [#uses=1]
+	%tmp4 = load volatile i32* %v, align 4		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %tmp, align 4
 	br label %bb6
 
 bb5:		; preds = %entry
-	volatile store i32 1, i32* %v, align 4
+	store volatile i32 1, i32* %v, align 4
 	call void @g( )
 	store i32 0, i32* %tmp, align 4
 	br label %bb6
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index 1ac97e9..8832f89 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -57,8 +57,8 @@ entry:
 
 declare i32 @bar(...)
 
+%struct.MV = type { i16, i16 }
 
-	%struct.MV = type { i16, i16 }
 
 define void @test2() nounwind  {
 entry:
@@ -220,3 +220,31 @@ entry:
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
 }
 
+; More aggressive heuristic
+; rdar://9892684
+define void @test7(i32* nocapture %c) nounwind optsize {
+  store i32 -1, i32* %c, align 4
+  %1 = getelementptr inbounds i32* %c, i32 1
+  store i32 -1, i32* %1, align 4
+  %2 = getelementptr inbounds i32* %c, i32 2
+  store i32 -1, i32* %2, align 4
+  %3 = getelementptr inbounds i32* %c, i32 3
+  store i32 -1, i32* %3, align 4
+  %4 = getelementptr inbounds i32* %c, i32 4
+  store i32 -1, i32* %4, align 4
+; CHECK: @test7
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
+  ret void
+}
+
+%struct.test8 = type { [4 x i32] }
+
+define void @test8() {
+entry:
+  %memtmp = alloca %struct.test8, align 16
+  %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
+  store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+  ret void
+; CHECK: @test8
+; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 861173b..44c2602 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -167,7 +167,7 @@ entry:
 loop:
   %c = bitcast i32* %x to i8*
   call void @objc_release(i8* %c) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -190,7 +190,7 @@ entry:
 loop:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -1528,9 +1528,11 @@ define void @test52(i8** %zz, i8** %pp) {
 
 ; Like test52, but the pointer has function type, so it's assumed to
 ; be not reference counted.
+; Oops. That's wrong. Clang sometimes uses function types gratuitously.
+; See rdar://10551239.
 
 ; CHECK: define void @test53(
-; CHECK-NOT: @objc_
+; CHECK: @objc_
 ; CHECK: }
 define void @test53(void ()** %zz, i8** %pp) {
   %p = load i8** %pp
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 25c93f4..fda2ff4 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -33,7 +33,7 @@ entry:
 define void @test1(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = volatile load i8** @x, align 8
+  %tmp = load volatile i8** @x, align 8
   store i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -53,7 +53,7 @@ define void @test2(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
   %tmp = load i8** @x, align 8
-  volatile store i8* %0, i8** @x, align 8
+  store volatile i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
 }
diff --git a/test/Transforms/ObjCARC/pointer-types.ll b/test/Transforms/ObjCARC/pointer-types.ll
new file mode 100644
index 0000000..6abc939
--- /dev/null
+++ b/test/Transforms/ObjCARC/pointer-types.ll
@@ -0,0 +1,31 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; Don't hoist @objc_release past a use of its pointer, even
+; if the use has function type, because clang uses function types
+; in dubious ways.
+; rdar://10551239
+
+; CHECK: define void @test0(
+; CHECK: %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+; CHECK-NEXT: call void @use_fptr(void ()* %otherBlock)
+; CHECK-NEXT: %tmp11 = bitcast void ()* %otherBlock to i8*
+; CHECK-NEXT: call void @objc_release(i8* %tmp11)
+
+define void @test0(i1 %tobool, void ()* %b1) {
+entry:
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+  call void @use_fptr(void ()* %otherBlock)
+  %tmp11 = bitcast void ()* %otherBlock to i8*
+  call void @objc_release(i8* %tmp11) nounwind
+  ret void
+}
+
+declare void @use_fptr(void ()*)
+declare void @objc_release(i8*)
+
diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
index cd6cf97..63f41db 100644
--- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
+++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -6,9 +6,9 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @x(i32 %b) {
 entry:
- %val = call i32 @llvm.cttz.i32(i32 undef)
+ %val = call i32 @llvm.cttz.i32(i32 undef, i1 true)
  ret i32 %val
 }
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
deleted file mode 100644
index d71bcb9..0000000
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ /dev/null
@@ -1,184 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
-	%0 = type { }		; type %0
-	%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
-	%2 = type { i8*, i32 }		; type %2
-	%3 = type opaque		; type %3
-	%4 = type { i32 }		; type %4
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* }
-	%llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 }
-	%llvm.dbg.subrange.type = type { i32, i64, i64 }
-	%llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [8 x i8] c"PR491.c\00", section "llvm.metadata"		; <[8 x i8]*>:0 [#uses=1]
-internal constant [77 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Regression/C/\00", section "llvm.metadata"		; <[77 x i8]*>:1 [#uses=1]
-internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*>:2 [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 1, i8* getelementptr ([8 x i8]* @0, i32 0, i32 0), i8* getelementptr ([77 x i8]* @1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*>:3 [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-internal constant [5 x i8] c"char\00", section "llvm.metadata"		; <[5 x i8]*>:4 [#uses=1]
-@llvm.dbg.basictype5 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @4, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 6 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype5 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype6 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"		; <[13 x i8]*>:5 [#uses=1]
-@llvm.dbg.basictype8 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype6 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype8 to %0*)], section "llvm.metadata"		; <[3 x %0*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [12 x i8] c"assert_fail\00", section "llvm.metadata"		; <[12 x i8]*>:6 [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 4, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to %0*), i1 true, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=0]
-internal constant [2 x i8] c"l\00", section "llvm.metadata"		; <[2 x i8]*>:7 [#uses=1]
-@__stderrp = external global %1*		; <%1**> [#uses=4]
-internal constant [35 x i8] c"assertion failed in line %u: '%s'\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[35 x i8]*>:8 [#uses=1]
-@llvm.dbg.array13 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite14 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array13 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*>:9 [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 10, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite14 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-internal constant [9 x i8] c"long int\00", section "llvm.metadata"		; <[9 x i8]*>:10 [#uses=1]
-@llvm.dbg.basictype21 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype21 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 3 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
-@llvm.dbg.array23 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"		; <[14 x i8]*>:11 [#uses=1]
-@llvm.dbg.basictype25 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([14 x i8]* @11, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 8 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.composite26 = internal constant %llvm.dbg.composite.type { i32 458753, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype25 to %0*), %0* bitcast ([1 x %0*]* @llvm.dbg.array23 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"c\00", section "llvm.metadata"		; <[2 x i8]*>:12 [#uses=1]
-@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @12, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite26 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array29 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype28 to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite30 = internal constant %llvm.dbg.composite.type { i32 458775, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array29 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"u\00", section "llvm.metadata"		; <[2 x i8]*>:13 [#uses=1]
-@llvm.dbg.variable32 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @13, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite30 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-internal constant [11 x i8] c"u.l == 128\00", section "__TEXT,__cstring,cstring_literals"		; <[11 x i8]*>:14 [#uses=1]
-internal constant [8 x i8] c"u.l < 0\00", section "__TEXT,__cstring,cstring_literals"		; <[8 x i8]*>:15 [#uses=1]
-@llvm.dbg.array35 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite36 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array35 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*>:16 [#uses=1]
-@llvm.dbg.subprogram38 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 28, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite36 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start(%0*) nounwind readnone
-
-declare void @llvm.dbg.declare(%0*, %0*) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind readnone
-
-declare i32 @fprintf(%1* nocapture, i8* nocapture, ...) nounwind
-
-declare void @llvm.dbg.region.end(%0*) nounwind readnone
-
-define i32 @test(i32) nounwind {
-; <label>:1
-	%2 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	%3 = bitcast %4* %2 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %3, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*))
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%4 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %4, align 8
-	%5 = bitcast %4* %2 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %5, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%6 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 8		; <i32> [#uses=1]
-	%8 = icmp eq i32 %7, 128		; <i1> [#uses=1]
-	br i1 %8, label %12, label %9
-
-; <label>:9		; preds = %1
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%10 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%11 = call i32 (%1*, i8*, ...)* @fprintf(%1* %10, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %12
-
-; <label>:12		; preds = %9, %1
-	%.0 = phi i32 [ 0, %9 ], [ 1, %1 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%13 = and i32 %.0, %0		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%14 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %14, align 8
-	%15 = bitcast %4* %2 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%16 = getelementptr [4 x i8]* %15, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %16, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%17 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%18 = load i32* %17, align 8		; <i32> [#uses=1]
-	%19 = icmp slt i32 %18, 0		; <i1> [#uses=1]
-	br i1 %19, label %23, label %20
-
-; <label>:20		; preds = %12
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%22 = call i32 (%1*, i8*, ...)* @fprintf(%1* %21, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %23
-
-; <label>:23		; preds = %20, %12
-	%.01 = phi i32 [ 0, %20 ], [ 1, %12 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%24 = and i32 %.01, %13		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	ret i32 %24
-}
-
-define i32 @main() nounwind {
-; <label>:0
-	%1 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%2 = bitcast %4* %1 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %2, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%3 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %3, align 8
-	%4 = bitcast %4* %1 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %4, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%5 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 8		; <i32> [#uses=1]
-	%7 = icmp eq i32 %6, 128		; <i1> [#uses=1]
-	br i1 %7, label %11, label %8
-
-; <label>:8		; preds = %0
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%9 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%10 = call i32 (%1*, i8*, ...)* @fprintf(%1* %9, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %11
-
-; <label>:11		; preds = %8, %0
-	%.0.i = phi i32 [ 0, %8 ], [ 1, %0 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%12 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %12, align 8
-	%13 = bitcast %4* %1 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%14 = getelementptr [4 x i8]* %13, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %14, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%15 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%16 = load i32* %15, align 8		; <i32> [#uses=1]
-	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
-	br i1 %17, label %test.exit, label %18
-
-; <label>:18		; preds = %11
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%19 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%20 = call i32 (%1*, i8*, ...)* @fprintf(%1* %19, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %test.exit
-
-test.exit:		; preds = %18, %11
-	%.01.i = phi i32 [ 0, %18 ], [ 1, %11 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = and i32 %.01.i, %.0.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%tmp = xor i32 %21, 1		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	ret i32 %tmp
-}
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
deleted file mode 100644
index ae2c6cc..0000000
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl-ssa -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-	%struct.Sphere = type { %struct.Vec }
-	%struct.Vec = type { i32, i32, i32 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [6 x i8] c"r.cpp\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"Vec\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str6 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype7 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 32, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [2 x i8] c"z\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 64, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype10 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype11 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 96, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype11 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array14 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite15 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array14 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite15 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array17 = internal constant [5 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype7 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype9 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array17 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array20 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array20 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str21 = internal constant [13 x i8] c"__comp_ctor \00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
-@.str22 = internal constant [14 x i8] c"_ZN3VecC1Eiii\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@llvm.dbg.array32 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite33 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str34 = internal constant [10 x i8] c"operator-\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str35 = internal constant [14 x i8] c"_ZmiRK3VecS1_\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@.str41 = internal constant [7 x i8] c"Sphere\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str43 = internal constant [7 x i8] c"center\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype44 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str43, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 14, i64 96, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype45 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array46 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype45 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite47 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str48 = internal constant [11 x i8] c"ray_sphere\00", section "llvm.metadata"		; <[11 x i8]*> [#uses=1]
-@.str49 = internal constant [30 x i8] c"_ZN6Sphere10ray_sphereERK3Vec\00", section "llvm.metadata"		; <[30 x i8]*> [#uses=1]
-@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite47 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array51 = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype44 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite52 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype53 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array54 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype53 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite55 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array54 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram56 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite55 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str61 = internal constant [2 x i8] c"v\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable62 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*), i8* getelementptr ([2 x i8]* @.str61, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 17, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @_ZN6Sphere10ray_sphereERK3Vec(%struct.Sphere* %this, %struct.Vec* %Orig) nounwind {
-entry:
-	%v = alloca %struct.Vec, align 8		; <%struct.Vec*> [#uses=4]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	%0 = bitcast %struct.Vec* %v to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable62 to { }*))
-	%1 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 2		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
-	%3 = getelementptr %struct.Vec* %Orig, i32 0, i32 2		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
-	%5 = sub i32 %2, %4		; <i32> [#uses=1]
-	%6 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
-	%8 = getelementptr %struct.Vec* %Orig, i32 0, i32 1		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = sub i32 %7, %9		; <i32> [#uses=1]
-	%11 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%12 = load i32* %11, align 4		; <i32> [#uses=1]
-	%13 = getelementptr %struct.Vec* %Orig, i32 0, i32 0		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 4		; <i32> [#uses=1]
-	%15 = sub i32 %12, %14		; <i32> [#uses=1]
-	%16 = getelementptr %struct.Vec* %v, i32 0, i32 0		; <i32*> [#uses=2]
-	store i32 %15, i32* %16, align 8
-	%17 = getelementptr %struct.Vec* %v, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %10, i32* %17, align 4
-	%18 = getelementptr %struct.Vec* %v, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 %5, i32* %18, align 8
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 9, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	%19 = load i32* %16, align 8		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 18, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	ret i32 %19
-}
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index ab276b0..fadf1aa 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -4,9 +4,9 @@
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
-	volatile store i32 %T, i32* %B
+	store volatile i32 %T, i32* %B
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
-	%X = volatile load i32* %C
+	%X = load volatile i32* %C
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll b/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
deleted file mode 100644
index 761f0d5..0000000
--- a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([7 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [7 x i8] c"cond.c\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5555) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable6 = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str7 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 393238, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([6 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype9 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [6 x i8] c"uint1\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@llvm.dbg.basictype9 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-
-define i32 @foo(i32 %x1, i1 zeroext %y2) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp4 = icmp eq i32 %x1, 0		; <i1> [#uses=1]
-	br i1 %tmp4, label %bb, label %bb14
-
-bb:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %y2, label %bb14, label %bb10
-
-bb7:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 7, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp9 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp9
-
-bb10:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 8, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp12 = add i32 %x1, 1		; <i32> [#uses=1]
-	%tmp13 = call i32 @g2(i32 %tmp12) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp13
-
-bb14:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp16 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %tmp16
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare i32 @g1(i32)
-
-declare i32 @g2(i32)
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll
index 266609b5..2b29681 100644
--- a/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -1,13 +1,19 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test(i32* %P, i32* %Q, i1 %A, i1 %B) {
+; CHECK: test
+; CHECK: br i1
+; CHECK-NOT: br i1
+; CHECK: ret
+; CHECK: ret
+
+entry:
         br i1 %A, label %a, label %b
-a:              ; preds = %0
+a:
         br i1 %B, label %b, label %c
-b:              ; preds = %a, %0
+b:
         store i32 123, i32* %P
         ret void
-c:              ; preds = %a
+c:
         ret void
 }
-
diff --git a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
deleted file mode 100644
index 6a500de..0000000
--- a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-; END.
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-
-define void @main() {
-entry:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
-endif.1.i20:		; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
-then.i.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-then.i12.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
-then.i5.i:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.2.i33:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
-no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
-loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %loopexit.0.i40
-loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
-loopentry.1.i:		; preds = %loopexit.0.i40
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
-no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
-	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
-endif.5.i:		; preds = %no_exit.1.i41
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
-loopexit.1.i.loopexit:		; preds = %endif.5.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.2.i44:		; preds = %loopentry.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.23, label %endif.1, label %then.1
-then.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.0.i, label %then.0.i
-then.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-endif.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-then.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-endif.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-then.2.i:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-getfree.exit:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.27.i = getelementptr i32* null, i32 0		; <i32*> [#uses=0]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
-no_exit.0.i14:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.0.i15:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-}
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
deleted file mode 100644
index 6fbbb1b..0000000
--- a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @bar(i32)
-
-define void @test(i1 %P, i32* %Q) {
-        br i1 %P, label %T, label %F
-T:              ; preds = %0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        store i32 1, i32* %Q
-        %A = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %A )
-        ret void
-F:              ; preds = %0
-        store i32 1, i32* %Q
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %B = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %B )
-        ret void
-}
-
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
deleted file mode 100644
index 2723ec6..0000000
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @t({ i32, i32 }* %I) {
-; CHECK: @t
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
-entry:
-        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
-shortcirc_next.0:               ; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
-shortcirc_next.1:               ; preds = %shortcirc_next.0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
-shortcirc_next.2:               ; preds = %shortcirc_next.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
-shortcirc_next.3:               ; preds = %shortcirc_next.2
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
-shortcirc_next.4:               ; preds = %shortcirc_next.3
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
-shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
-UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
-}
-
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
deleted file mode 100644
index 343e169..0000000
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep " switch"
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [10 x i8] c"swithh2.c\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str1 = internal constant [38 x i8] c"/developer/home2/zsth/test/debug/tmp/\00", section "llvm.metadata"		; <[38 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([38 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 -1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-
-define i32 @foo(i32 %x) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb4 [
-		i32 1, label %bb
-		i32 2, label %bb1
-		i32 3, label %bb2
-		i32 4, label %bb3
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %0, %entry
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb1
-
-bb1:		; preds = %1, %entry
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb2
-
-bb2:		; preds = %2, %entry
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb3
-
-bb3:		; preds = %3, %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb4
-
-bb4:		; preds = %4, %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb7 [
-		i32 5, label %bb5
-		i32 6, label %bb6
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb5
-
-bb5:		; preds = %5, %bb4
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb6
-
-bb6:		; preds = %6, %bb4
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb7
-
-bb7:		; preds = %7, %bb4
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb5, %bb3, %bb2, %bb1, %bb
-	%.0 = phi i32 [ 4, %bb3 ], [ 3, %bb2 ], [ 2, %bb1 ], [ 1, %bb ], [ 6, %bb6 ], [ 5, %bb5 ], [ 0, %bb7 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %return
-
-return:		; preds = %bb8
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %.0
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
deleted file mode 100644
index 01041eb..0000000
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
-entry:
-        %tmp7 = icmp eq i8* %m, %n
-        br i1 %tmp7, label %bb, label %UnifiedReturnBlock
-
-bb:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp15 = icmp eq i8* %o, %p
-        br label %UnifiedReturnBlock
-
-UnifiedReturnBlock:
-        %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
-        ret i1 %result
-}
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
new file mode 100644
index 0000000..f0f3dca
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/fwrite.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+%FILE = type { i32 }
+
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i64 @foo(%FILE* %f) {
+; CHECK: %retval = call i64 @fwrite
+  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
+  ret i64 %retval
+}
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 2343372..4c531d8 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -21,7 +21,7 @@ false:
   ret i32 0
 }
 
-; But don't sink volatile loads...
+; But don't sink load volatiles...
 
 ;      CHECK: @foo2
 ;      CHECK: load volatile
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index 899e115..a29b72e 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -46,7 +46,7 @@ else:		; preds = %entry
 }
 
 ; This load can't be safely moved above the call because that would change the
-; order in which the volatile loads are performed.
+; order in which the load volatiles are performed.
 define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
@@ -58,7 +58,7 @@ if:		; preds = %entry
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = volatile load i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load volatile i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll
new file mode 100644
index 0000000..48cd061
--- /dev/null
+++ b/test/Verifier/cttz-undef-arg.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define void @f(i32 %x, i1 %is_not_zero) {
+entry:
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.ctlz.i32
+  call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero)
+
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.cttz.i32
+  call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero)
+  ret void
+}
diff --git a/test/lit.cfg b/test/lit.cfg
index 1f08c93..6bc170c 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -283,5 +283,10 @@ else:
 if loadable_module:
     config.available_features.add('loadable_module')
 
-if config.enable_assertions:
+# llc knows whether he is compiled with -DNDEBUG.
+import subprocess
+llc_cmd = subprocess.Popen([os.path.join(llvm_tools_dir, 'llc'), '-version'],
+                           stdout = subprocess.PIPE)
+if re.search(r'with assertions', llc_cmd.stdout.read()):
     config.available_features.add('asserts')
+llc_cmd.wait()
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 28b36dc..75d203e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -11,16 +11,9 @@ if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
 endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
 
 if( NOT WIN32 OR MSYS OR CYGWIN )
-  # It is useful to build llvm-config before the other tools, so we
-  # have a fresh LibDeps.txt for regenerating the hard-coded library
-  # dependencies. llvm-config/CMakeLists.txt takes care of this but we
-  # must keep llvm-config as the first entry on the list of tools to
-  # be built.
-  add_subdirectory(llvm-config)
-
-  # We currently require 'sed' to build llvm-config-2, so don't try to build it
+  # We currently require 'sed' to build llvm-config, so don't try to build it
   # on pure Win32.
-  add_subdirectory(llvm-config-2)
+  add_subdirectory(llvm-config)
 endif()
 
 add_subdirectory(opt)
@@ -35,6 +28,7 @@ add_subdirectory(llvm-nm)
 add_subdirectory(llvm-size)
 
 add_subdirectory(llvm-ld)
+add_subdirectory(llvm-cov)
 add_subdirectory(llvm-prof)
 add_subdirectory(llvm-link)
 add_subdirectory(lli)
@@ -50,7 +44,6 @@ add_subdirectory(bugpoint)
 add_subdirectory(bugpoint-passes)
 add_subdirectory(llvm-bcanalyzer)
 add_subdirectory(llvm-stub)
-add_subdirectory(edis)
 
 if( NOT WIN32 )
   add_subdirectory(lto)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index 77b54c2..aba990f 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -15,8 +15,10 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size llvm-stub macho-dump opt
+
 [component_0]
 type = Group
 name = Tools
 parent = $ROOT
-
diff --git a/tools/Makefile b/tools/Makefile
index 278546b..1953cb6 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  bugpoint llvm-bcanalyzer llvm-stub \
                  llvm-diff macho-dump llvm-objdump \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size llvm-config-2
+	         llvm-size
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
@@ -56,14 +56,6 @@ ifeq ($(ENABLE_PIC),1)
   endif
 
   PARALLEL_DIRS += bugpoint-passes
-
-  # The edis library is only supported if ARM and/or X86 are enabled, and if
-  # LLVM is being built PIC on platforms that support dylibs.
-  ifneq ($(DISABLE_EDIS),1)
-    ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
-      PARALLEL_DIRS += edis
-    endif
-  endif
 endif
 
 ifdef LLVM_HAS_POLLY
diff --git a/tools/bugpoint/LLVMBuild.txt b/tools/bugpoint/LLVMBuild.txt
index ac9f7e6..549d9d0 100644
--- a/tools/bugpoint/LLVMBuild.txt
+++ b/tools/bugpoint/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = bugpoint
 parent = Tools
 required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Linker Scalar
-
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
deleted file mode 100644
index 1e162f9..0000000
--- a/tools/edis/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-include_directories(${CMAKE_CURRENT_BINARY_DIR})
-
-set(SOURCES
-  ../../include/llvm-c/EnhancedDisassembly.h
-  EDMain.cpp
-  )
-
-set(EDIS_DEPENDS LLVMMCDisassembler LLVMMCParser)
-if( LLVM_TARGETS_TO_BUILD MATCHES X86 )
-  list(APPEND EDIS_DEPENDS LLVMX86AsmPrinter LLVMX86AsmParser LLVMX86Disassembler LLVMX86Desc)
-endif()
-if( LLVM_TARGETS_TO_BUILD MATCHES ARM )
-  list(APPEND EDIS_DEPENDS LLVMARMAsmPrinter LLVMARMAsmParser LLVMARMDisassembler LLVMARMDesc)
-endif()
-
-add_llvm_library(EnhancedDisassembly ${SOURCES})
-set_property(TARGET EnhancedDisassembly PROPERTY
-  OUTPUT_NAME "EnhancedDisassembly")
-
-add_llvm_library_dependencies(EnhancedDisassembly
-  ${EDIS_DEPENDS})
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
deleted file mode 100644
index 39a5cf7..0000000
--- a/tools/edis/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-##===- tools/edis/Makefile -----------------------------------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-
-LEVEL := ../..
-LIBRARYNAME := EnhancedDisassembly
-LINK_LIBS_IN_SHARED := 1
-
-EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
-
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this 
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := mcdisassembler
-
-# If the X86 target is enabled, link in the asmprinter and disassembler.
-ifneq ($(filter $(TARGETS_TO_BUILD), X86),)
-LINK_COMPONENTS += x86asmprinter x86disassembler
-endif
-
-# If the ARM target is enabled, link in the asmprinter and disassembler.
-ifneq ($(filter $(TARGETS_TO_BUILD), ARM),)
-LINK_COMPONENTS += armasmprinter armdisassembler
-endif
-
-include $(LEVEL)/Makefile.common
-
-ifeq ($(HOST_OS),Darwin)
-    # extra options to override libtool defaults 
-    LLVMLibsOptions    := $(LLVMLibsOptions)  \
-                         -Wl,-dead_strip
-
-    ifdef EDIS_VERSION
-        LLVMLibsOptions    := $(LLVMLibsOptions) -Wl,-current_version -Wl,$(EDIS_VERSION) \
-                              -Wl,-compatibility_version -Wl,1
-    endif
-
-    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
-    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
-    ifneq ($(DARWIN_VERS),8)
-       LLVMLibsOptions    := $(LLVMLibsOptions)  \
-                            -Wl,-install_name \
-                            -Wl,"@rpath/lib$(LIBRARYNAME)$(SHLIBEXT)"
-    endif
-endif
-
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 78fe523..8c8794f 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llc
 parent = Tools
 required_libraries = AsmParser BitReader all-targets
-
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index fdc6fec..58dafca 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -141,6 +141,134 @@ DisableRedZone("disable-red-zone",
   cl::desc("Do not emit code that uses the red zone."),
   cl::init(false));
 
+static cl::opt<bool>
+EnableFPMAD("enable-fp-mad",
+  cl::desc("Enable less precise MAD instructions to be generated"),
+  cl::init(false));
+
+static cl::opt<bool>
+PrintCode("print-machineinstrs",
+  cl::desc("Print generated machine code"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElim("disable-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableExcessPrecision("disable-excess-fp-precision",
+  cl::desc("Disable optimizations that may increase FP precision"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+  cl::desc("Enable optimizations that may decrease FP precision"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+  cl::desc("Enable FP math optimizations that assume no +-Infs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+  cl::desc("Enable FP math optimizations that assume no NaNs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+  cl::Hidden,
+  cl::desc("Force codegen to assume rounding mode can change dynamically"),
+  cl::init(false));
+
+static cl::opt<bool>
+GenerateSoftFloatCalls("soft-float",
+  cl::desc("Generate software floating point library calls"),
+  cl::init(false));
+
+static cl::opt<llvm::FloatABI::ABIType>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
+
+static cl::opt<bool>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+  cl::desc("Don't place zero-initialized symbols into bss section"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::init(false));
+
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool>
+EmitJitDebugInfo("jit-emit-debug",
+  cl::desc("Emit debug information to debugger"),
+  cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+
+static cl::opt<bool>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+  cl::Hidden,
+  cl::desc("Emit debug info objfiles to disk"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::init(false));
+
+static cl::opt<unsigned>
+OverrideStackAlignment("stack-alignment",
+  cl::desc("Override default stack alignment"),
+  cl::init(0));
+
+static cl::opt<bool>
+EnableRealignStack("realign-stack",
+  cl::desc("Realign stack if needed"),
+  cl::init(true));
+
+static cl::opt<bool>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables", 
+  cl::desc("Do not generate jump tables."),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+  cl::desc("Use strong PHI elimination."),
+  cl::init(false));
+
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+  cl::desc("Emit a call to trap function rather than a trap instruction"),
+  cl::init(""));
+
+static cl::opt<bool>
+SegmentedStacks("segmented-stacks",
+  cl::desc("Use segmented stacks if possible."),
+  cl::init(false));
+
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -318,9 +446,34 @@ int main(int argc, char **argv) {
   case '3': OLvl = CodeGenOpt::Aggressive; break;
   }
 
+  TargetOptions Options;
+  Options.LessPreciseFPMADOption = EnableFPMAD;
+  Options.PrintMachineCode = PrintCode;
+  Options.NoFramePointerElim = DisableFPElim;
+  Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
+  Options.NoExcessFPPrecision = DisableExcessPrecision;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.HonorSignDependentRoundingFPMathOption =
+      EnableHonorSignDependentRoundingFPMath;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.JITExceptionHandling = EnableJITExceptionHandling;
+  Options.JITEmitDebugInfo = EmitJitDebugInfo;
+  Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.RealignStack = EnableRealignStack;
+  Options.DisableJumpTables = DisableSwitchTables;
+  Options.TrapFuncName = TrapFuncName;
+  Options.EnableSegmentedStacks = SegmentedStacks;
+
   std::auto_ptr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(),
-                                          MCPU, FeaturesStr,
+                                          MCPU, FeaturesStr, Options,
                                           RelocModel, CMModel, OLvl));
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
@@ -334,6 +487,9 @@ int main(int argc, char **argv) {
   if (EnableDwarfDirectory)
     Target.setMCUseDwarfDirectory(true);
 
+  if (GenerateSoftFloatCalls)
+    FloatABIForCalls = FloatABI::Soft;
+
   // Disable .loc support for older OS X versions.
   if (TheTriple.isMacOSX() &&
       TheTriple.isMacOSXVersionLT(10, 6))
diff --git a/tools/lli/LLVMBuild.txt b/tools/lli/LLVMBuild.txt
index 35d46d7..4eb82bd 100644
--- a/tools/lli/LLVMBuild.txt
+++ b/tools/lli/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = lli
 parent = Tools
 required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG
-
diff --git a/tools/llvm-ar/LLVMBuild.txt b/tools/llvm-ar/LLVMBuild.txt
index f758aa1..1f61a32 100644
--- a/tools/llvm-ar/LLVMBuild.txt
+++ b/tools/llvm-ar/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-ar
 parent = Tools
 required_libraries = Archive
-
diff --git a/tools/llvm-as/LLVMBuild.txt b/tools/llvm-as/LLVMBuild.txt
index 1fe2589..542470b 100644
--- a/tools/llvm-as/LLVMBuild.txt
+++ b/tools/llvm-as/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-as
 parent = Tools
 required_libraries = AsmParser BitWriter
-
diff --git a/tools/llvm-bcanalyzer/LLVMBuild.txt b/tools/llvm-bcanalyzer/LLVMBuild.txt
index 0f07464..ee77a7d 100644
--- a/tools/llvm-bcanalyzer/LLVMBuild.txt
+++ b/tools/llvm-bcanalyzer/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-bcanalyzer
 parent = Tools
 required_libraries = BitReader
-
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index f0bbc9f..f1cb523 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -102,14 +102,13 @@ static const char *GetBlockName(unsigned BlockID,
   default:                           return 0;
   case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
   case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
-  case bitc::TYPE_BLOCK_ID_OLD:      return "TYPE_BLOCK_ID_OLD";
   case bitc::TYPE_BLOCK_ID_NEW:      return "TYPE_BLOCK_ID";
   case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
   case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
-  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: return "TYPE_SYMTAB_OLD";
   case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
   case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
   case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
+  case bitc::USELIST_BLOCK_ID:       return "USELIST_BLOCK_ID";
   }
 }
 
@@ -163,7 +162,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default: return 0;
     case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
     }
-  case bitc::TYPE_BLOCK_ID_OLD:
   case bitc::TYPE_BLOCK_ID_NEW:
     switch (CodeID) {
     default: return 0;
@@ -175,8 +173,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
     case bitc::TYPE_CODE_INTEGER:      return "INTEGER";
     case bitc::TYPE_CODE_POINTER:      return "POINTER";
-    case bitc::TYPE_CODE_FUNCTION_OLD: return "FUNCTION_OLD";
-    case bitc::TYPE_CODE_STRUCT_OLD:   return "STRUCT_OLD";
     case bitc::TYPE_CODE_ARRAY:        return "ARRAY";
     case bitc::TYPE_CODE_VECTOR:       return "VECTOR";
     case bitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
@@ -248,11 +244,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
     case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     }
-  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
-    switch (CodeID) {
-    default: return 0;
-    case bitc::TST_CODE_ENTRY: return "ENTRY";
-    }
   case bitc::VALUE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
     default: return 0;
@@ -274,6 +265,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
     case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
     }
+  case bitc::USELIST_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::USELIST_CODE_ENTRY:   return "USELIST_CODE_ENTRY";
+    }
   }
 }
 
diff --git a/tools/llvm-config-2/CMakeLists.txt b/tools/llvm-config-2/CMakeLists.txt
deleted file mode 100644
index 3c7169d3..0000000
--- a/tools/llvm-config-2/CMakeLists.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-set(LLVM_LINK_COMPONENTS support)
-
-# We need to generate the BuildVariables.inc file containing values which are
-# only defined when under certain build modes. Unfortunately, that precludes
-# doing this inside CMake so we have to shell out to sed. For now, that means we
-# can't expect to build llvm-config on Window.s
-set(BUILDVARIABLES_SRCPATH ${CMAKE_CURRENT_SOURCE_DIR}/BuildVariables.inc.in)
-set(BUILDVARIABLES_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.inc)
-set(SEDSCRIPT_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.configure.sed)
-
-# Compute the substitution values for various items.
-get_system_libs(LLVM_SYSTEM_LIBS_LIST)
-foreach(l ${LLVM_SYSTEM_LIBS_LIST})
-  set(SYSTEM_LIBS ${SYSTEM_LIBS} "-l${l}")
-endforeach()
-set(C_FLGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
-set(CXX_FLGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
-set(CPP_FLGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
-
-add_custom_command(OUTPUT ${BUILDVARIABLES_OBJPATH}
-  COMMAND echo s!@LLVM_SRC_ROOT@!${LLVM_MAIN_SRC_DIR}! > ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_OBJ_ROOT@!${LLVM_BINARY_DIR}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> ${SEDSCRIPT_OBJPATH}
-  # TODO: Use general flags for linking! not just for shared libs:
-  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND echo s!@LLVM_SYSTEM_LIBS@!${SYSTEM_LIBS}! >> ${SEDSCRIPT_OBJPATH}
-  COMMAND sed -f ${SEDSCRIPT_OBJPATH} < ${BUILDVARIABLES_SRCPATH} > ${BUILDVARIABLES_OBJPATH}
-  VERBATIM
-  COMMENT "Building BuildVariables.inc include."
-  )
-
-# Add the llvm-config tool.
-add_llvm_tool(llvm-config-2
-  llvm-config.cpp
-  )
-
-# Add the dependency on the generation step.
-add_file_dependencies(${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.cpp ${BUILDVARIABLES_OBJPATH})
diff --git a/tools/llvm-config-2/Makefile b/tools/llvm-config-2/Makefile
deleted file mode 100644
index 08e0844..0000000
--- a/tools/llvm-config-2/Makefile
+++ /dev/null
@@ -1,57 +0,0 @@
-##===- tools/llvm-config/Makefile---------------------------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-
-LEVEL := ../..
-TOOLNAME := llvm-config-2
-USEDLIBS := LLVMSupport.a
-
-# We generate sources in the build directory, make sure it is in the include
-# paths.
-INCLUDE_BUILD_DIR := 1
-
-# This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS := 1
-
-# Note that we have to use lazy expansion here.
-BUILDVARIABLES_SRCPATH = $(PROJ_SRC_ROOT)/tools/$(TOOLNAME)/BuildVariables.inc.in
-BUILDVARIABLES_OBJPATH = $(ObjDir)/BuildVariables.inc
-BUILT_SOURCES = $(BUILDVARIABLES_OBJPATH)
-
-include $(LEVEL)/Makefile.common
-
-# Combine preprocessor flags (except for -I) and CXX flags.
-SUB_CPPFLAGS := ${CPP.BaseFlags}
-SUB_CFLAGS   := ${CPP.BaseFlags} ${C.Flags}
-SUB_CXXFLAGS := ${CPP.BaseFlags} ${CXX.Flags}
-
-# This is blank for now.  We need to be careful about adding stuff here:
-# LDFLAGS tend not to be portable, and we don't currently require the
-# user to use libtool when linking against LLVM.
-SUB_LDFLAGS :=
-
-$(ObjDir)/BuildVariables.inc: $(BUILDVARIABLES_SRCPATH) Makefile $(ObjDir)/.dir
-	$(Echo) "Building llvm-config BuildVariables.inc file."
-	$(Verb) $(ECHO) 's/@LLVM_SRC_ROOT@/$(subst /,\/,$(LLVM_SRC_ROOT))/' \
-	  > temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_OBJ_ROOT@/$(subst /,\/,$(LLVM_OBJ_ROOT))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
-	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_SYSTEM_LIBS@/$(subst /,\/,$(LIBS))/' \
-	  >> temp.sed
-	$(Verb) $(SED) -f temp.sed < $< > $@
-	$(Verb) $(RM) temp.sed
diff --git a/tools/llvm-config-2/BuildVariables.inc.in b/tools/llvm-config/BuildVariables.inc.in
index 5969772..fe87afb 100644
--- a/tools/llvm-config-2/BuildVariables.inc.in
+++ b/tools/llvm-config/BuildVariables.inc.in
@@ -23,4 +23,5 @@
 #define LLVM_LDFLAGS "@LLVM_LDFLAGS@"
 #define LLVM_CXXFLAGS "@LLVM_CXXFLAGS@"
 #define LLVM_BUILDMODE "@LLVM_BUILDMODE@"
+#define LLVM_TARGETS_BUILT "@LLVM_TARGETS_BUILT@"
 #define LLVM_SYSTEM_LIBS "@LLVM_SYSTEM_LIBS@"
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 53f0084..5ad58bf 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -1,138 +1,42 @@
-include(TestBigEndian)
+set(LLVM_LINK_COMPONENTS support)
 
-include(FindPerl)
-if( NOT PERL_FOUND )
-  message(FATAL_ERROR "Perl required but not found!")
-endif( NOT PERL_FOUND )
-
-set(PERL ${PERL_EXECUTABLE})
-set(VERSION PACKAGE_VERSION)
-set(PREFIX ${CMAKE_INSTALL_PREFIX})
-set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
-set(abs_top_builddir ${LLVM_BINARY_DIR})
-execute_process(COMMAND date
-  OUTPUT_VARIABLE LLVM_CONFIGTIME
-  OUTPUT_STRIP_TRAILING_WHITESPACE)
-# LLVM_ON_UNIX and LLVM_ON_WIN32 already set.
-# those are set to blank by `autoconf' on MinGW, so it seems they are not required:
-#set(LLVMGCCDIR "")
-#set(LLVMGCC "")
-#set(LLVMGXX "")
-test_big_endian(IS_BIG_ENDIAN)
-if( IS_BIG_ENDIAN )
-  set(ENDIAN "big")
-else( IS_BIG_ENDIAN )
-  set(ENDIAN "little")
-endif( IS_BIG_ENDIAN )
-set(SHLIBEXT ${LTDL_SHLIB_EXT})
-#EXEEXT already set.
-set(OS "${CMAKE_SYSTEM}")
-set(target "${TARGET_TRIPLE}")
-set(ARCH "${LLVM_NATIVE_ARCH}")
+# We need to generate the BuildVariables.inc file containing values which are
+# only defined when under certain build modes. Unfortunately, that precludes
+# doing this inside CMake so we have to shell out to sed. For now, that means we
+# can't expect to build llvm-config on Window.s
+set(BUILDVARIABLES_SRCPATH ${CMAKE_CURRENT_SOURCE_DIR}/BuildVariables.inc.in)
+set(BUILDVARIABLES_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.inc)
+set(SEDSCRIPT_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.configure.sed)
 
+# Compute the substitution values for various items.
 get_system_libs(LLVM_SYSTEM_LIBS_LIST)
 foreach(l ${LLVM_SYSTEM_LIBS_LIST})
-  set(LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS} "-l${l}")
+  set(SYSTEM_LIBS ${SYSTEM_LIBS} "-l${l}")
 endforeach()
-
-foreach(c ${LLVM_TARGETS_TO_BUILD})
-  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
-endforeach(c)
-set(TARGETS_TO_BUILD ${TARGETS_BUILT})
-set(TARGET_HAS_JIT "1")  # TODO
-
-# Avoids replacement at config-time:
-set(LLVM_CPPFLAGS "@LLVM_CPPFLAGS@")
-set(LLVM_CFLAGS "@LLVM_CFLAGS@")
-set(LLVM_CXXFLAGS "@LLVM_CXXFLAGS@")
-set(LLVM_LDFLAGS "@LLVM_LDFLAGS@")
-set(LIBS "@LIBS@")
-set(LLVM_BUILDMODE "@LLVM_BUILDMODE@")
-set(LLVM_OBJ_SUFFIX "@LLVM_OBJ_SUFFIX@")
-
-configure_file(
-  ${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.in.in
-  ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in
-  @ONLY
-)
-
-set(LIBDEPS LibDeps.txt)
-set(LIBDEPS_TMP LibDeps.txt.tmp)
-set(FINAL_LIBDEPS FinalLibDeps.txt)
-set(LLVM_CONFIG ${LLVM_TOOLS_BINARY_DIR}/llvm-config)
-set(LLVM_CONFIG_IN ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in)
-
-if( CMAKE_CROSSCOMPILING )
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
-endif()
-
-find_program(NM_PATH nm PATH_SUFFIXES /bin)
-
-if( NOT NM_PATH )
-  message(FATAL_ERROR "`nm' not found")
-endif()
-
-get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
-
-add_custom_command(OUTPUT ${LIBDEPS_TMP}
-  COMMAND ${PERL_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/GenLibDeps.pl -flat ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR} ${NM_PATH} > ${LIBDEPS_TMP}
-  DEPENDS ${llvm_libs}
-  COMMENT "Regenerating ${LIBDEPS_TMP}")
-
-add_custom_command(OUTPUT ${LIBDEPS}
-  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBDEPS_TMP} ${LIBDEPS}
-  DEPENDS ${LIBDEPS_TMP}
-  COMMENT "Updating ${LIBDEPS} if necessary...")
-
-# This must stop the build if find-cycles.pl returns error:
-add_custom_command(OUTPUT ${FINAL_LIBDEPS}
-  COMMAND ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS} ${FINAL_LIBDEPS}.tmp
-  COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS}.tmp
-  COMMAND ${CMAKE_COMMAND} -E copy ${FINAL_LIBDEPS}.tmp ${FINAL_LIBDEPS}
-  DEPENDS ${LIBDEPS}
-  COMMENT "Checking for cyclic dependencies between LLVM libraries.")
-
 set(C_FLGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CXX_FLGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CPP_FLGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 
-# We don't want certain flags on the output of
-# llvm-config --cflags --cxxflags
-macro(remove_option_from_llvm_config option)
-  llvm_replace_compiler_option(C_FLGS "${option}" "")
-  llvm_replace_compiler_option(CXX_FLGS "${option}" "")
-  llvm_replace_compiler_option(CPP_FLGS "${option}" "")
-endmacro(remove_option_from_llvm_config)
-remove_option_from_llvm_config("-pedantic")
-remove_option_from_llvm_config("-Wall")
-remove_option_from_llvm_config("-W")
-
-add_custom_command(OUTPUT ${LLVM_CONFIG}
-  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! > temp.sed
-  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> temp.sed
-  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> temp.sed
+add_custom_command(OUTPUT ${BUILDVARIABLES_OBJPATH}
+  COMMAND echo s!@LLVM_SRC_ROOT@!${LLVM_MAIN_SRC_DIR}! > ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_OBJ_ROOT@!${LLVM_BINARY_DIR}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> ${SEDSCRIPT_OBJPATH}
   # TODO: Use general flags for linking! not just for shared libs:
-  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> temp.sed
-  COMMAND echo s!@LIBS@!${LLVM_SYSTEM_LIBS}! >> temp.sed
-  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> temp.sed
-  COMMAND echo s!@LLVM_OBJ_SUFFIX@!! >> temp.sed
-  COMMAND sed -f temp.sed < ${LLVM_CONFIG_IN} > ${LLVM_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E remove -f temp.sed
-  COMMAND cat ${FINAL_LIBDEPS} >> ${LLVM_CONFIG}
-  COMMAND chmod +x ${LLVM_CONFIG}
+  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_SYSTEM_LIBS@!${SYSTEM_LIBS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_TARGETS_BUILT@!${LLVM_TARGETS_TO_BUILD}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND sed -f ${SEDSCRIPT_OBJPATH} < ${BUILDVARIABLES_SRCPATH} > ${BUILDVARIABLES_OBJPATH}
   VERBATIM
-  DEPENDS ${FINAL_LIBDEPS} ${LLVM_CONFIG_IN}
-  COMMENT "Building llvm-config script."
+  COMMENT "Building BuildVariables.inc include."
   )
 
-add_custom_target(llvm-config.target ALL
-  DEPENDS ${LLVM_CONFIG})
-
-# Ensure we build llvm-config after we build all of the libraries so that we
-# have their full dependencies.
-add_dependencies(llvm-config.target ${llvm_libs})
+# Add the llvm-config tool.
+add_llvm_tool(llvm-config
+  llvm-config.cpp
+  )
 
-install(FILES ${LLVM_CONFIG}
-  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
-  WORLD_READ WORLD_EXECUTE
-  DESTINATION bin)
+# Add the dependency on the generation step.
+add_file_dependencies(${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.cpp ${BUILDVARIABLES_OBJPATH})
diff --git a/tools/llvm-config/Makefile b/tools/llvm-config/Makefile
index 184919e..3f11730 100644
--- a/tools/llvm-config/Makefile
+++ b/tools/llvm-config/Makefile
@@ -1,5 +1,5 @@
-##===- tools/llvm-config/Makefile --------------------------*- Makefile -*-===##
-#
+##===- tools/llvm-config/Makefile---------------------------*- Makefile -*-===##
+# 
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
@@ -7,57 +7,42 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
+LEVEL := ../..
+TOOLNAME := llvm-config
+USEDLIBS := LLVMSupport.a
 
-EXTRA_DIST = LibDeps.txt FinalLibDeps.txt llvm-config.in.in find-cycles.pl
+# We generate sources in the build directory, make sure it is in the include
+# paths.
+INCLUDE_BUILD_DIR := 1
 
-include $(LEVEL)/Makefile.common
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
 
-# If we don't have Perl, we can't generate the library dependencies upon which 
-# llvm-config depends. Therefore, only if we detect perl will we do anything
-# useful.
-ifeq ($(HAVE_PERL),1)
+# Note that we have to use lazy expansion here.
+BUILDVARIABLES_SRCPATH = $(PROJ_SRC_ROOT)/tools/$(TOOLNAME)/BuildVariables.inc.in
+BUILDVARIABLES_OBJPATH = $(ObjDir)/BuildVariables.inc
+BUILT_SOURCES = $(BUILDVARIABLES_OBJPATH)
+
+include $(LEVEL)/Makefile.common
 
 # Combine preprocessor flags (except for -I) and CXX flags.
-SUB_CPPFLAGS = ${CPP.BaseFlags}
-SUB_CFLAGS   = ${CPP.BaseFlags} ${C.Flags}
-SUB_CXXFLAGS = ${CPP.BaseFlags} ${CXX.Flags}
+SUB_CPPFLAGS := ${CPP.BaseFlags}
+SUB_CFLAGS   := ${CPP.BaseFlags} ${C.Flags}
+SUB_CXXFLAGS := ${CPP.BaseFlags} ${CXX.Flags}
 
 # This is blank for now.  We need to be careful about adding stuff here:
 # LDFLAGS tend not to be portable, and we don't currently require the
 # user to use libtool when linking against LLVM.
-SUB_LDFLAGS = 
-
-FinalLibDeps = $(PROJ_OBJ_DIR)/FinalLibDeps.txt
-LibDeps      = $(PROJ_OBJ_DIR)/LibDeps.txt
-LibDepsTemp  = $(PROJ_OBJ_DIR)/LibDeps.txt.tmp
-GenLibDeps   = $(PROJ_SRC_ROOT)/utils/GenLibDeps.pl
-
-$(LibDepsTemp): $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a $(LibDir)/*.o)
-	$(Echo) "Regenerating LibDeps.txt.tmp"
-	$(Verb) $(PERL) $(GenLibDeps) -flat $(LibDir) "$(NM_PATH)" > $(LibDepsTemp)
-
-$(LibDeps): $(LibDepsTemp)
-	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
-	  $(EchoCmd) Updated LibDeps.txt because dependencies changed )
-
-# Find all the cyclic dependencies between various LLVM libraries, so we
-# don't have to process them at runtime.
-$(FinalLibDeps): find-cycles.pl $(LibDeps)
-	$(Echo) "Checking for cyclic dependencies between LLVM libraries."
-	$(Verb) $(PERL) $< < $(LibDeps) > $@ || rm -f $@
+SUB_LDFLAGS :=
 
-# Rerun our configure substitutions as needed.
-ConfigInIn = $(PROJ_SRC_DIR)/llvm-config.in.in
-llvm-config.in: $(ConfigInIn) $(ConfigStatusScript)
-	$(Verb) cd $(PROJ_OBJ_ROOT) ; \
-		$(ConfigStatusScript) tools/llvm-config/llvm-config.in
-
-# Build our final script.
-$(ToolDir)/llvm-config: llvm-config.in $(FinalLibDeps)
-	$(Echo) "Building llvm-config script."
-	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+$(ObjDir)/BuildVariables.inc: $(BUILDVARIABLES_SRCPATH) Makefile $(ObjDir)/.dir
+	$(Echo) "Building llvm-config BuildVariables.inc file."
+	$(Verb) $(ECHO) 's/@LLVM_SRC_ROOT@/$(subst /,\/,$(LLVM_SRC_ROOT))/' \
 	  > temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_OBJ_ROOT@/$(subst /,\/,$(LLVM_OBJ_ROOT))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
 	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
@@ -66,28 +51,9 @@ $(ToolDir)/llvm-config: llvm-config.in $(FinalLibDeps)
 	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
 	  >> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_OBJ_SUFFIX@/$(subst /,\/,/$(BuildMode))/' \
+	$(Verb) $(ECHO) 's/@LLVM_SYSTEM_LIBS@/$(subst /,\/,$(LIBS))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_TARGETS_BUILT@/$(subst /,\/,$(TARGETS_TO_BUILD))/' \
 	  >> temp.sed
 	$(Verb) $(SED) -f temp.sed < $< > $@
 	$(Verb) $(RM) temp.sed
-	$(Verb) cat $(FinalLibDeps) >> $@
-	$(Verb) chmod +x $@
-
-else
-# We don't have perl, just generate a dummy llvm-config
-$(ToolDir)/llvm-config:
-	$(Echo) "Building place holder llvm-config script."
-	$(Verb) $(ECHO) 'echo llvm-config: Perl not found so llvm-config could not be generated' >> $@
-	$(Verb) chmod +x $@
-
-endif
-# Hook into the standard Makefile rules.
-all-local:: $(ToolDir)/llvm-config
-clean-local::
-	$(Verb) $(RM) -f $(ToolDir)/llvm-config llvm-config.in $(FinalLibDeps) \
-	  $(LibDeps) GenLibDeps.out
-install-local:: all-local
-	$(Echo) Installing llvm-config
-	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_bindir)
-	$(Verb) $(ScriptInstall) $(ToolDir)/llvm-config $(DESTDIR)$(PROJ_bindir)
-
diff --git a/tools/llvm-config/find-cycles.pl b/tools/llvm-config/find-cycles.pl
deleted file mode 100755
index 5cbf5b4..0000000
--- a/tools/llvm-config/find-cycles.pl
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/perl
-#
-# Program:  find-cycles.pl
-#
-# Synopsis: Given a list of possibly cyclic dependencies, merge all the
-#           cycles.  This makes it possible to topologically sort the
-#           dependencies between different parts of LLVM.
-#
-# Syntax:   find-cycles.pl < LibDeps.txt > FinalLibDeps.txt
-#
-# Input:    cycmem1: cycmem2 dep1 dep2
-#           cycmem2: cycmem1 dep3 dep4
-#           boring: dep4
-#
-# Output:   cycmem1 cycmem2: dep1 dep2 dep3 dep4
-#           boring: dep4
-#
-# This file was written by Eric Kidd, and is placed into the public domain.
-#
-
-use 5.006;
-use strict;
-use warnings;
-
-my %DEPS;
-my @CYCLES;
-sub find_all_cycles;
-
-# Read our dependency information.
-while (<>) {
-    chomp;
-    my ($module, $dependency_str) = /^\s*([^:]+):\s*(.*)\s*$/;
-    die "Malformed data: $_" unless defined $dependency_str;
-    my @dependencies = split(/ /, $dependency_str);
-    $DEPS{$module} = \@dependencies;
-}
-
-# Partition our raw dependencies into sets of cyclically-connected nodes.
-find_all_cycles();
-
-# Print out the finished cycles, with their dependencies.
-my @output;
-my $cycles_found = 0;
-foreach my $cycle (@CYCLES) {
-    my @modules = sort keys %{$cycle};
-
-    # Merge the dependencies of all modules in this cycle.
-    my %dependencies;
-    foreach my $module (@modules) {
-        @dependencies{@{$DEPS{$module}}} = 1;
-    }
-
-    # Prune the known cyclic dependencies.
-    foreach my $module (@modules) {
-        delete $dependencies{$module};
-    }
-
-    # Warn about possible linker problems.
-    my @archives = grep(/\.a$/, @modules);
-    if (@archives > 1) {
-        $cycles_found = $cycles_found + 1;
-        print STDERR "find-cycles.pl: Circular dependency between *.a files:\n";
-        print STDERR "find-cycles.pl:   ", join(' ', @archives), "\n";
-        push @modules, @archives; # WORKAROUND: Duplicate *.a files. Ick.
-    } elsif (@modules > 1) {
-        $cycles_found = $cycles_found + 1;
-        print STDERR "find-cycles.pl: Circular dependency between *.o files:\n";
-        print STDERR "find-cycles.pl:   ", join(' ', @modules), "\n";
-        push @modules, @modules; # WORKAROUND: Duplicate *.o files. Ick.
-    }
-
-    # Add to our output.  (@modules is already as sorted as we need it to be.)
-    push @output, (join(' ', @modules) . ': ' .
-                   join(' ', sort keys %dependencies) . "\n");
-}
-print sort @output;
-
-exit $cycles_found;
-
-#==========================================================================
-#  Depedency Cycle Support
-#==========================================================================
-#  For now, we have cycles in our dependency graph.  Ideally, each cycle
-#  would be collapsed down to a single *.a file, saving us all this work.
-#
-#  To understand this code, you'll need a working knowledge of Perl 5,
-#  and possibly some quality time with 'man perlref'.
-
-my %SEEN;
-my %CYCLES;
-sub find_cycles ($@);
-sub found_cycles ($@);
-
-sub find_all_cycles {
-    # Find all multi-item cycles.
-    my @modules = sort keys %DEPS;
-    foreach my $module (@modules) { find_cycles($module); }
-
-    # Build fake one-item "cycles" for the remaining modules, so we can
-    # treat them uniformly.
-    foreach my $module (@modules) {
-        unless (defined $CYCLES{$module}) {
-            my %cycle = ($module, 1);
-            $CYCLES{$module} = \%cycle;
-        }
-    }
-
-    # Find all our unique cycles.  We have to do this the hard way because
-    # we apparently can't store hash references as hash keys without making
-    # 'strict refs' sad.
-    my %seen;
-    foreach my $cycle (values %CYCLES) {
-        unless ($seen{$cycle}) {
-            $seen{$cycle} = 1;
-            push @CYCLES, $cycle;
-        }
-    }
-}
-
-# Walk through our graph depth-first (keeping a trail in @path), and report
-# any cycles we find.
-sub find_cycles ($@) {
-    my ($module, @path) = @_;
-    if (str_in_list($module, @path)) {
-        found_cycle($module, @path);
-    } else {
-        return if defined $SEEN{$module};
-        $SEEN{$module} = 1;
-        foreach my $dep (@{$DEPS{$module}}) {
-            find_cycles($dep, @path, $module);
-        }
-    }
-}
-
-# Give a cycle, attempt to merge it with pre-existing cycle data.
-sub found_cycle ($@) {
-    my ($module, @path) = @_;
-
-    # Pop any modules which aren't part of our cycle.
-    while ($path[0] ne $module) { shift @path; }
-    #print join("->", @path, $module) . "\n";
-
-    # Collect the modules in our cycle into a hash.
-    my %cycle;
-    foreach my $item (@path) {
-        $cycle{$item} = 1;
-        if (defined $CYCLES{$item}) {
-            # Looks like we intersect with an existing cycle, so merge
-            # all those in, too.
-            foreach my $old_item (keys %{$CYCLES{$item}}) {
-                $cycle{$old_item} = 1;
-            }
-        }
-    }
-
-    # Update our global cycle table.
-    my $cycle_ref = \%cycle;
-    foreach my $item (keys %cycle) {
-        $CYCLES{$item} = $cycle_ref;
-    }
-    #print join(":", sort keys %cycle) . "\n";
-}
-
-sub str_in_list ($@) {
-    my ($str, @list) = @_;
-    foreach my $item (@list) {
-        return 1 if ($item eq $str);
-    }
-    return 0;
-}
diff --git a/tools/llvm-config-2/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp
index fddd481..2bb0aeb 100644
--- a/tools/llvm-config-2/llvm-config.cpp
+++ b/tools/llvm-config/llvm-config.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 #include <set>
@@ -149,7 +148,6 @@ Options:\n\
   --build-mode      Print build mode of LLVM tree (e.g. Debug or Release).\n\
 Typical components:\n\
   all               All LLVM libraries (default).\n\
-  backend           Either a native backend or the C backend.\n\
   engine            Either a native JIT or a bitcode interpreter.\n";
   exit(1);
 }
@@ -210,16 +208,15 @@ int main(int argc, char **argv) {
   std::string ActivePrefix, ActiveBinDir, ActiveIncludeDir, ActiveLibDir;
   std::string ActiveIncludeOption;
   if (IsInDevelopmentTree) {
+    ActiveIncludeDir = std::string(LLVM_SRC_ROOT) + "/include";
     ActivePrefix = CurrentExecPrefix;
 
     // CMake organizes the products differently than a normal prefix style
     // layout.
     if (DevelopmentTreeLayoutIsCMakeStyle) {
-      ActiveIncludeDir = ActiveObjRoot + "/include";
       ActiveBinDir = ActiveObjRoot + "/bin/" + LLVM_BUILDMODE;
       ActiveLibDir = ActiveObjRoot + "/lib/" + LLVM_BUILDMODE;
     } else {
-      ActiveIncludeDir = ActiveObjRoot + "/include";
       ActiveBinDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/bin";
       ActiveLibDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/lib";
     }
@@ -273,14 +270,7 @@ int main(int argc, char **argv) {
         }
         OS << '\n';
       } else if (Arg == "--targets-built") {
-        bool First = true;
-        for (TargetRegistry::iterator I = TargetRegistry::begin(),
-               E = TargetRegistry::end(); I != E; First = false, ++I) {
-          if (!First)
-            OS << ' ';
-          OS << I->getName();
-        }
-        OS << '\n';
+        OS << LLVM_TARGETS_BUILT << '\n';
       } else if (Arg == "--host-target") {
         OS << LLVM_DEFAULT_TARGET_TRIPLE << '\n';
       } else if (Arg == "--build-mode") {
@@ -301,6 +291,10 @@ int main(int argc, char **argv) {
     usage();
 
   if (PrintLibs || PrintLibNames || PrintLibFiles) {
+    // If no components were specified, default to "all".
+    if (Components.empty())
+      Components.push_back("all");
+
     // Construct the list of all the required libraries.
     std::vector<StringRef> RequiredLibs;
     ComputeLibsForComponents(Components, RequiredLibs);
diff --git a/tools/llvm-config/llvm-config.in.in b/tools/llvm-config/llvm-config.in.in
deleted file mode 100644
index d811e59..0000000
--- a/tools/llvm-config/llvm-config.in.in
+++ /dev/null
@@ -1,466 +0,0 @@
-#!@PERL@
-##===- tools/llvm-config ---------------------------------------*- perl -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-#
-# Synopsis: Prints out compiler options needed to build against an installed
-#           copy of LLVM.
-#
-# Syntax:   llvm-config OPTIONS... [COMPONENTS...]
-# 
-##===----------------------------------------------------------------------===##
-
-use 5.006;
-use strict;
-use warnings;
-use Cwd 'abs_path';
-
-#---- begin autoconf values ----
-my $PACKAGE_NAME        = q{@PACKAGE_NAME@};
-my $VERSION             = q{@PACKAGE_VERSION@};
-my $PREFIX              = q{@LLVM_PREFIX@};
-my $LLVM_CONFIGTIME     = q{@LLVM_CONFIGTIME@};
-my $LLVM_SRC_ROOT       = q{@abs_top_srcdir@};
-my $LLVM_OBJ_ROOT       = q{@abs_top_builddir@};
-my $ARCH                = lc(q{@ARCH@});
-my $TARGET_TRIPLE       = q{@target@};
-my $TARGETS_TO_BUILD    = q{@TARGETS_TO_BUILD@};
-my $TARGET_HAS_JIT      = q{@TARGET_HAS_JIT@};
-my @TARGETS_BUILT       = map { lc($_) } qw{@TARGETS_TO_BUILD@};
-#---- end autoconf values ----
-
-# Must pretend x86_64 architecture is really x86, otherwise the native backend
-# won't get linked in.
-$ARCH = "x86" if $ARCH eq "x86_64";
-
-#---- begin Makefile values ----
-my $CPPFLAGS            = q{@LLVM_CPPFLAGS@};
-my $CFLAGS              = q{@LLVM_CFLAGS@};
-my $CXXFLAGS            = q{@LLVM_CXXFLAGS@};
-my $LDFLAGS             = q{@LLVM_LDFLAGS@};
-my $SYSTEM_LIBS         = q{@LIBS@};
-my $LLVM_BUILDMODE      = q{@LLVM_BUILDMODE@};
-my $LLVM_OBJ_SUFFIX     = q{@LLVM_OBJ_SUFFIX@};
-#---- end Makefile values ----
-
-# Figure out where llvm-config is being run from.  Primarily, we care if it has
-# been installed, or is running from the build directory, which changes the
-# locations of some files.
-
-# Convert the current executable name into its directory (e.g. ".").
-my ($RUN_DIR) = ($0 =~ /^(.*)\/.*$/);
-
-# Turn the directory into an absolute directory on the file system, also pop up
-# from "bin" into the build or prefix dir.
-my $ABS_RUN_DIR = abs_path("$RUN_DIR/..");
-chomp($ABS_RUN_DIR);
-
-# Compute the absolute object directory build, e.g. "foo/llvm/Debug".
-my $ABS_OBJ_ROOT = "$LLVM_OBJ_ROOT$LLVM_OBJ_SUFFIX";
-$ABS_OBJ_ROOT = abs_path("$ABS_OBJ_ROOT") if (-d $ABS_OBJ_ROOT);
-chomp($ABS_OBJ_ROOT);
-
-my $INCLUDEDIR = "$ABS_RUN_DIR/include";
-my $INCLUDEOPTION = "-I$INCLUDEDIR";
-my $LIBDIR     = "$ABS_RUN_DIR/lib";
-my $BINDIR     = "$ABS_RUN_DIR/bin";
-if ($ABS_RUN_DIR eq $ABS_OBJ_ROOT) {
-  # If we are running out of the build directory, the include dir is in the
-  # srcdir.
-  $INCLUDEDIR = "$LLVM_SRC_ROOT/include";
-  # We need include files from both the srcdir and objdir.
-  $INCLUDEOPTION = "-I$INCLUDEDIR -I$LLVM_OBJ_ROOT/include"
-} else {
-  # If installed, ignore the prefix the tree was configured with, use the
-  # current prefix.
-  $PREFIX = $ABS_RUN_DIR;
-}
-
-sub usage;
-sub fix_library_names (@);
-sub fix_library_files (@);
-sub expand_dependencies (@);
-sub name_map_entries;
-
-# Parse our command-line arguments.
-usage if @ARGV == 0;
-my @components;
-my $has_opt = 0;
-my $want_libs = 0;
-my $want_libnames = 0;
-my $want_libfiles = 0;
-my $want_components = 0;
-foreach my $arg (@ARGV) {
-    if ($arg =~ /^-/) {
-        if ($arg eq "--version") {
-            $has_opt = 1; print "$VERSION\n";
-        } elsif ($arg eq "--prefix") {
-            $has_opt = 1; print "$PREFIX\n";
-        } elsif ($arg eq "--bindir") {
-            $has_opt = 1; print "$BINDIR\n";
-        } elsif ($arg eq "--includedir") {
-            $has_opt = 1; print "$INCLUDEDIR\n";
-        } elsif ($arg eq "--libdir") {
-            $has_opt = 1; print "$LIBDIR\n";
-        } elsif ($arg eq "--cppflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CPPFLAGS\n";
-        } elsif ($arg eq "--cflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CFLAGS\n";
-        } elsif ($arg eq "--cxxflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CXXFLAGS\n";
-        } elsif ($arg eq "--ldflags") {
-            $has_opt = 1; print "-L$LIBDIR $LDFLAGS $SYSTEM_LIBS\n";
-        } elsif ($arg eq "--libs") {
-            $has_opt = 1; $want_libs = 1;
-        } elsif ($arg eq "--libnames") {
-            $has_opt = 1; $want_libnames = 1;
-        } elsif ($arg eq "--libfiles") {
-            $has_opt = 1; $want_libfiles = 1;
-        } elsif ($arg eq "--components") {
-            $has_opt = 1; print join(' ', name_map_entries), "\n";
-        } elsif ($arg eq "--targets-built") {
-            $has_opt = 1; print join(' ', @TARGETS_BUILT), "\n";
-        } elsif ($arg eq "--host-target") {
-            $has_opt = 1; print "$TARGET_TRIPLE\n";
-        } elsif ($arg eq "--build-mode") {
-            $has_opt = 1; print "$LLVM_BUILDMODE\n";
-        } elsif ($arg eq "--obj-root") {
-            $has_opt = 1; print abs_path("$LLVM_OBJ_ROOT/");
-        } elsif ($arg eq "--src-root") {
-            $has_opt = 1; print abs_path("$LLVM_SRC_ROOT/");
-        } else {
-            usage();
-        }
-    } else {
-        push @components, $arg;
-    }
-}
-
-# If no options were specified, fail.
-usage unless $has_opt;
-
-# If no components were specified, default to 'all'.
-if (@components == 0) {
-    push @components, 'all';
-}
-
-# Force component names to lower case.
-@components = map lc, @components;
-
-# Handle any arguments which require building our dependency graph.
-if ($want_libs || $want_libnames || $want_libfiles) {
-    my @libs = expand_dependencies(@components);
-    print join(' ', fix_library_names(@libs)), "\n" if ($want_libs);
-    print join(' ',  @libs), "\n" if ($want_libnames);
-    print join(' ', fix_library_files(@libs)), "\n" if ($want_libfiles);
-}
-
-exit 0;
-
-#==========================================================================
-#  Support Routines
-#==========================================================================
-
-sub usage {
-    print STDERR <<__EOD__;
-Usage: llvm-config <OPTION>... [<COMPONENT>...]
-
-Get various configuration information needed to compile programs which use
-LLVM.  Typically called from 'configure' scripts.  Examples:
-  llvm-config --cxxflags
-  llvm-config --ldflags
-  llvm-config --libs engine bcreader scalaropts
-
-Options:
-  --version          Print LLVM version.
-  --prefix           Print the installation prefix.
-  --src-root         Print the source root LLVM was built from.
-  --obj-root         Print the object root used to build LLVM.
-  --bindir           Directory containing LLVM executables.
-  --includedir       Directory containing LLVM headers.
-  --libdir           Directory containing LLVM libraries.
-  --cppflags         C preprocessor flags for files that include LLVM headers.
-  --cflags           C compiler flags for files that include LLVM headers.
-  --cxxflags         C++ compiler flags for files that include LLVM headers.
-  --ldflags          Print Linker flags.
-  --libs             Libraries needed to link against LLVM components.
-  --libnames         Bare library names for in-tree builds.
-  --libfiles         Fully qualified library filenames for makefile depends.
-  --components       List of all possible components.
-  --targets-built    List of all targets currently built.
-  --host-target      Target triple used to configure LLVM.
-  --build-mode       Print build mode of LLVM tree (e.g. Debug or Release).
-Typical components:
-  all                All LLVM libraries (default).
-  engine             Either a native JIT or a bitcode interpreter.
-__EOD__
-    exit(1);
-}
-
-# Use -lfoo instead of libfoo.a whenever possible, and add directories to
-# files which can't be found using -L.
-sub fix_library_names (@) {
-    my @libs = @_;
-    my @result;
-    foreach my $lib (@libs) {
-        # Transform the bare library name appropriately.
-        my ($basename) = ($lib =~ /^lib([^.]*)\.a/);
-        if (defined $basename) {
-            push @result, "-l$basename";
-        } else {
-            push @result, "$LIBDIR/$lib";
-        }
-    }
-    return @result;
-}
-
-# Turn the list of libraries into a list of files.
-sub fix_library_files(@) {
-    my @libs = @_;
-    my @result;
-    foreach my $lib (@libs) {
-        # Transform the bare library name into a filename.
-        push @result, "$LIBDIR/$lib";
-    }
-    return @result;
-}
-
-#==========================================================================
-#  Library Dependency Analysis
-#==========================================================================
-#  Given a few human-readable library names, find all their dependencies
-#  and sort them into an order which the linker will like.  If we packed
-#  our libraries into fewer archives, we could make the linker do much
-#  of this work for us.
-#
-#  Libraries have two different types of names in this code: Human-friendly
-#  "component" names entered on the command-line, and the raw file names
-#  we use internally (and ultimately pass to the linker).
-#
-#  To understand this code, you'll need a working knowledge of Perl 5,
-#  and possibly some quality time with 'man perlref'.
-
-sub load_dependencies;
-sub build_name_map;
-sub have_native_backend;
-sub find_best_engine;
-sub expand_names (@);
-sub find_all_required_sets (@);
-sub find_all_required_sets_helper ($$@);
-
-# Each "set" contains one or more libraries which must be included as a
-# group (due to cyclic dependencies).  Sets are represented as a Perl array
-# reference pointing to a list of internal library names.
-my @SETS;
-
-# Various mapping tables.
-my %LIB_TO_SET_MAP; # Maps internal library names to their sets.
-my %SET_DEPS;       # Maps sets to a list of libraries they depend on.
-my %NAME_MAP;       # Maps human-entered names to internal names.
-
-# Have our dependencies been loaded yet?
-my $DEPENDENCIES_LOADED = 0;
-
-# Given a list of human-friendly component names, translate them into a
-# complete set of linker arguments.
-sub expand_dependencies (@) {
-    my @libs = @_;
-    load_dependencies;
-    my @required_sets = find_all_required_sets(expand_names(@libs));
-    my @sorted_sets = topologically_sort_sets(@required_sets);
-
-    # Expand the library sets into libraries.
-    my @result;
-    foreach my $set (@sorted_sets) { push @result, @{$set}; }
-    return @result;
-}
-
-# Load in the raw dependency data stored at the end of this file.
-sub load_dependencies {
-    return if $DEPENDENCIES_LOADED;
-    $DEPENDENCIES_LOADED = 1;
-    while (<DATA>) {
-        # Parse our line.
-        my ($libs, $deps) = /^\s*([^:]+):\s*(.*)\s*$/;
-        die "Malformed dependency data" unless defined $deps;
-        my @libs = split(' ', $libs);
-        my @deps = split(' ', $deps);
-
-        # Record our dependency data.
-        my $set = \@libs;
-        push @SETS, $set;
-        foreach my $lib (@libs) { $LIB_TO_SET_MAP{$lib} = $set; }
-        $SET_DEPS{$set} = \@deps;
-    }
-    build_name_map;
-}
-
-# Build a map converting human-friendly component names into internal
-# library names.
-sub build_name_map {
-    # Add entries for all the actual libraries.
-    foreach my $set (@SETS) {
-        foreach my $lib (sort @$set) {
-            my $short_name = $lib;
-            $short_name =~ s/^(lib)?LLVM([^.]*)\..*$/$2/;
-            $short_name =~ tr/A-Z/a-z/;
-            $NAME_MAP{$short_name} = [$lib];
-        }
-    }
-
-    # Add target-specific entries
-    my @all_targets;
-    foreach my $target (@TARGETS_BUILT) {
-        # FIXME: Temporary, until we don't switch all targets
-        if (defined $NAME_MAP{$target.'asmprinter'}) {
-            $NAME_MAP{$target} = [$target.'info',
-                                  $target.'asmprinter', 
-                                  $target.'codegen']
-        } elsif (defined $NAME_MAP{$target.'codegen'}) {
-            $NAME_MAP{$target} = [$target.'info',
-                                  $target.'codegen']
-        } else {
-            $NAME_MAP{$target} = [$target.'info',
-                                  $NAME_MAP{$target}[0]]
-        }
-
-        if (defined $NAME_MAP{$target.'asmparser'}) {
-            push @{$NAME_MAP{$target}},$target.'asmparser'
-        }
-
-        if (defined $NAME_MAP{$target.'disassembler'}) {
-            push @{$NAME_MAP{$target}},$target.'disassembler'
-        }
-
-        push @all_targets, $target;
-    }
-
-    # Add virtual entries.
-    $NAME_MAP{'native'}  = have_native_backend() ? [$ARCH] : [];
-    $NAME_MAP{'nativecodegen'} = have_native_backend() ? [$ARCH.'codegen'] : [];
-    $NAME_MAP{'engine'}  = find_best_engine;
-    $NAME_MAP{'all-targets'}     = \@all_targets;
-    $NAME_MAP{'all'}     = [name_map_entries];   # Must be last.
-}
-
-# Return true if we have a native backend to use.
-sub have_native_backend {
-    my %BUILT;
-    foreach my $target (@TARGETS_BUILT) { $BUILT{$target} = 1; }
-    return defined $NAME_MAP{$ARCH} && defined $BUILT{$ARCH};
-}
-
-# Find a working subclass of ExecutionEngine for this platform.
-sub find_best_engine {
-    if (have_native_backend && $TARGET_HAS_JIT) {
-        return ['jit', 'native'];
-    } else {
-        return ['interpreter'];
-    }
-}
-
-# Get all the human-friendly component names.
-sub name_map_entries {
-    load_dependencies;
-    return sort keys %NAME_MAP;
-}
-
-# Map human-readable names to internal library names.
-sub expand_names (@) {
-    my @names = @_;
-    my @result;
-    foreach my $name (@names) {
-        if (defined $LIB_TO_SET_MAP{$name}) {
-            # We've hit bottom: An actual library name.
-            push @result, $name;
-        } elsif (defined $NAME_MAP{$name}) {
-            # We've found a short name to expand.
-            push @result, expand_names(@{$NAME_MAP{$name}});
-        } else {
-            print STDERR "llvm-config: unknown component name: $name\n";
-            exit(1);
-        }
-    }
-    return @result;
-}
-
-# Given a list of internal library names, return all sets of libraries which
-# will need to be included by the linker (in no particular order).
-sub find_all_required_sets (@) {
-    my @libs = @_;
-    my %sets_added;
-    my @result;
-    find_all_required_sets_helper(\%sets_added, \@result, @libs);
-    return @result;
-}
-
-# Recursive closures are pretty broken in Perl, so we're going to separate
-# this function from find_all_required_sets and pass in the state we need
-# manually, as references.  Yes, this is fairly unpleasant.
-sub find_all_required_sets_helper ($$@) {
-    my ($sets_added, $result, @libs) = @_;
-    foreach my $lib (@libs) {
-        my $set = $LIB_TO_SET_MAP{$lib};
-        next if defined $$sets_added{$set};
-        $$sets_added{$set} = 1;
-        push @$result, $set;
-        find_all_required_sets_helper($sets_added, $result, @{$SET_DEPS{$set}});
-    }
-}
-
-# Print a list of sets, with a label.  Used for debugging.
-sub print_sets ($@) {
-    my ($label, @sets) = @_;
-    my @output;
-    foreach my $set (@sets) { push @output, join(',', @$set); }
-    print "$label: ", join(';', @output), "\n";
-}
-
-# Returns true if $lib is a key in $added.
-sub has_lib_been_added ($$) {
-    my ($added, $lib) = @_;
-    return defined $$added{$LIB_TO_SET_MAP{$lib}};
-}
-
-# Returns true if all the dependencies of $set appear in $added.
-sub have_all_deps_been_added ($$) {
-    my ($added, $set) = @_;
-    #print_sets("  Checking", $set);
-    #print_sets("     Wants", $SET_DEPS{$set});
-    foreach my $lib (@{$SET_DEPS{$set}}) {
-        return 0 unless has_lib_been_added($added, $lib);
-    }
-    return 1;
-}
-
-# Given a list of sets, topologically sort them using dependencies.
-sub topologically_sort_sets (@) {
-    my @sets = @_;
-    my %added;
-    my @result;
-    SCAN: while (@sets) { # We'll delete items from @sets as we go.
-        #print_sets("So far", reverse(@result));
-        #print_sets("Remaining", @sets);
-        for (my $i = 0; $i < @sets; ++$i) {
-            my $set = $sets[$i];
-            if (have_all_deps_been_added(\%added, $set)) {
-                push @result, $set;
-                $added{$set} = 1;
-                #print "Removing $i.\n";
-                splice(@sets, $i, 1);
-                next SCAN; # Restart our scan.
-            }
-        }
-        die "Can't find a library with no dependencies";
-    }
-    return reverse(@result);
-}
-
-# Our library dependency data will be added after the '__END__' token, and will
-# be read through the magic <DATA> filehandle.
-__END__
diff --git a/tools/llvm-cov/LLVMBuild.txt b/tools/llvm-cov/LLVMBuild.txt
index 1c4d3ae..87e00d1 100644
--- a/tools/llvm-cov/LLVMBuild.txt
+++ b/tools/llvm-cov/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-cov
 parent = Tools
 required_libraries = Instrumentation
-
diff --git a/tools/llvm-diff/LLVMBuild.txt b/tools/llvm-diff/LLVMBuild.txt
index c911daa..fa06a03 100644
--- a/tools/llvm-diff/LLVMBuild.txt
+++ b/tools/llvm-diff/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-diff
 parent = Tools
 required_libraries = AsmParser BitReader
-
diff --git a/tools/llvm-dis/LLVMBuild.txt b/tools/llvm-dis/LLVMBuild.txt
index 6da96e7..4525010 100644
--- a/tools/llvm-dis/LLVMBuild.txt
+++ b/tools/llvm-dis/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-dis
 parent = Tools
 required_libraries = Analysis BitReader
-
diff --git a/tools/llvm-dwarfdump/LLVMBuild.txt b/tools/llvm-dwarfdump/LLVMBuild.txt
index a15410e..28b7c4c 100644
--- a/tools/llvm-dwarfdump/LLVMBuild.txt
+++ b/tools/llvm-dwarfdump/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-dwarfdump
 parent = Tools
 required_libraries = DebugInfo Object
-
diff --git a/tools/llvm-extract/LLVMBuild.txt b/tools/llvm-extract/LLVMBuild.txt
index 18322fb..1b1a4c3 100644
--- a/tools/llvm-extract/LLVMBuild.txt
+++ b/tools/llvm-extract/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-extract
 parent = Tools
 required_libraries = AsmParser BitReader BitWriter IPO
-
diff --git a/tools/llvm-ld/LLVMBuild.txt b/tools/llvm-ld/LLVMBuild.txt
index f432ae7..eed0452 100644
--- a/tools/llvm-ld/LLVMBuild.txt
+++ b/tools/llvm-ld/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-ld
 parent = Tools
 required_libraries = Archive BitWriter IPO Linker Scalar
-
diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt
index d582da2..6399ded 100644
--- a/tools/llvm-link/LLVMBuild.txt
+++ b/tools/llvm-link/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-link
 parent = Tools
 required_libraries = AsmParser BitReader BitWriter Linker
-
diff --git a/tools/llvm-mc/LLVMBuild.txt b/tools/llvm-mc/LLVMBuild.txt
index 9ac6902..dff5358 100644
--- a/tools/llvm-mc/LLVMBuild.txt
+++ b/tools/llvm-mc/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-mc
 parent = Tools
 required_libraries = MC MCDisassembler MCParser Support all-targets
-
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 8718b10..4281259 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -234,6 +234,17 @@ static tool_output_file *GetOutputStream() {
   return Out;
 }
 
+static std::string DwarfDebugFlags;
+static void setDwarfDebugFlags(int argc, char **argv) {
+  if (!getenv("RC_DEBUG_OPTIONS"))
+    return;
+  for (int i = 0; i < argc; i++) {
+    DwarfDebugFlags += argv[i];
+    if (i + 1 < argc)
+      DwarfDebugFlags += " ";
+  }
+}
+
 static int AsLexInput(const char *ProgName) {
   OwningPtr<MemoryBuffer> BufferPtr;
   if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
@@ -382,6 +393,8 @@ static int AssembleInput(const char *ProgName) {
     Ctx.setAllowTemporaryLabels(false);
 
   Ctx.setGenDwarfForAssembly(GenDwarfForAssembly);
+  if (!DwarfDebugFlags.empty()) 
+    Ctx.setDwarfDebugFlags(StringRef(DwarfDebugFlags));
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
@@ -418,7 +431,7 @@ static int AssembleInput(const char *ProgName) {
                                            /*useCFI*/ true,
                                            /*useDwarfDirectory*/ true,
                                            IP, CE, MAB, ShowInst));
-                                           
+
   } else if (FileType == OFT_Null) {
     Str.reset(createNullStreamer(Ctx));
   } else {
@@ -508,6 +521,7 @@ int main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   TripleName = Triple::normalize(TripleName);
+  setDwarfDebugFlags(argc, argv);
 
   switch (Action) {
   default:
diff --git a/tools/llvm-nm/LLVMBuild.txt b/tools/llvm-nm/LLVMBuild.txt
index 45dc6f6..38ecbfd 100644
--- a/tools/llvm-nm/LLVMBuild.txt
+++ b/tools/llvm-nm/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-nm
 parent = Tools
 required_libraries = Archive BitReader Object
-
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index e79d72d..3e159c7 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Format.h"
@@ -110,6 +111,19 @@ namespace {
   std::string ToolName;
 }
 
+
+static void error(Twine message, Twine path = Twine()) {
+  errs() << ToolName << ": " << path << ": " << message << ".\n";
+}
+
+static bool error(error_code ec, Twine path = Twine()) {
+  if (ec) {
+    error(ec.message(), path);
+    return true;
+  }
+  return false;
+}
+
 namespace {
   struct NMSymbol {
     uint64_t  Address;
@@ -144,14 +158,6 @@ namespace {
   StringRef CurrentFilename;
   typedef std::vector<NMSymbol> SymbolListT;
   SymbolListT SymbolList;
-
-  bool error(error_code ec) {
-    if (!ec) return false;
-
-    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
-    outs().flush();
-    return true;
-  }
 }
 
 static void SortAndPrintSymbolList() {
@@ -286,7 +292,7 @@ static void DumpSymbolNamesFromObject(ObjectFile *obj) {
       if (error(i->getSize(s.Size))) break;
     }
     if (PrintAddress)
-      if (error(i->getOffset(s.Address))) break;
+      if (error(i->getAddress(s.Address))) break;
     if (error(i->getNMTypeChar(s.TypeChar))) break;
     if (error(i->getName(s.Name))) break;
     SymbolList.push_back(s);
@@ -297,33 +303,34 @@ static void DumpSymbolNamesFromObject(ObjectFile *obj) {
 }
 
 static void DumpSymbolNamesFromFile(std::string &Filename) {
+  if (Filename != "-" && !sys::fs::exists(Filename)) {
+    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
+    return;
+  }
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error(MemoryBuffer::getFileOrSTDIN(Filename, Buffer), Filename))
+    return;
+
+  sys::fs::file_magic magic = sys::fs::identify_magic(Buffer->getBuffer());
+
   LLVMContext &Context = getGlobalContext();
   std::string ErrorMessage;
-  sys::Path aPath(Filename);
-  bool exists;
-  if (sys::fs::exists(aPath.str(), exists) || !exists)
-    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
-  // Note: Currently we do not support reading an archive from stdin.
-  if (Filename == "-" || aPath.isBitcodeFile()) {
-    OwningPtr<MemoryBuffer> Buffer;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buffer))
-      ErrorMessage = ec.message();
+  if (magic == sys::fs::file_magic::bitcode) {
     Module *Result = 0;
-    if (Buffer.get())
-      Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
-
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
     if (Result) {
       DumpSymbolNamesFromModule(Result);
       delete Result;
-    } else
-      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
-
-  } else if (aPath.isArchive()) {
-    OwningPtr<Binary> arch;
-    if (error_code ec = object::createBinary(aPath.str(), arch)) {
-      errs() << ToolName << ": " << Filename << ": " << ec.message() << ".\n";
+    } else {
+      error(ErrorMessage, Filename);
       return;
     }
+  } else if (magic == sys::fs::file_magic::archive) {
+    OwningPtr<Binary> arch;
+    if (error(object::createBinary(Buffer.take(), arch), Filename))
+      return;
+
     if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
       for (object::Archive::child_iterator i = a->begin_children(),
                                            e = a->end_children(); i != e; ++i) {
@@ -347,12 +354,10 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
         }
       }
     }
-  } else if (aPath.isObjectFile()) {
+  } else if (magic.is_object()) {
     OwningPtr<Binary> obj;
-    if (error_code ec = object::createBinary(aPath.str(), obj)) {
-      errs() << ToolName << ": " << Filename << ": " << ec.message() << ".\n";
+    if (error(object::createBinary(Buffer.take(), obj), Filename))
       return;
-    }
     if (object::ObjectFile *o = dyn_cast<ObjectFile>(obj.get()))
       DumpSymbolNamesFromObject(o);
   } else {
@@ -370,6 +375,10 @@ int main(int argc, char **argv) {
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
 
+  // llvm-nm only reads binary files.
+  if (error(sys::Program::ChangeStdinToBinary()))
+    return 1;
+
   ToolName = argv[0];
   if (BSDFormat) OutputFormat = bsd;
   if (POSIXFormat) OutputFormat = posix;
diff --git a/tools/llvm-objdump/LLVMBuild.txt b/tools/llvm-objdump/LLVMBuild.txt
index 4a8103e..d16c501 100644
--- a/tools/llvm-objdump/LLVMBuild.txt
+++ b/tools/llvm-objdump/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-objdump
 parent = Tools
 required_libraries = DebugInfo MC MCDisassembler MCParser Object all-targets
-
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 4ccae51..ffeea88 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -419,7 +419,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
 
       // Start at the address of the symbol relative to the section's address.
       uint64_t Start = 0;
-      Symbols[SymIdx].getOffset(Start);
+      Symbols[SymIdx].getAddress(Start);
 
       // Stop disassembling either at the beginning of the next symbol or at
       // the end of the section.
@@ -432,7 +432,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         if (NextSymType == SymbolRef::ST_Function) {
           Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
                                            containsNextSym);
-          Symbols[NextSymIdx].getOffset(NextSym);
+          Symbols[NextSymIdx].getAddress(NextSym);
           break;
         }
         ++NextSymIdx;
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 9071d58..bcfecb3 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -186,7 +186,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       bool contains;
       if (!error(i->containsSymbol(*si, contains)) && contains) {
         uint64_t Address;
-        if (error(si->getOffset(Address))) break;
+        if (error(si->getAddress(Address))) break;
         StringRef Name;
         if (error(si->getName(Name))) break;
         Symbols.push_back(std::make_pair(Address, Name));
@@ -477,7 +477,7 @@ static void PrintSymbolTable(const ObjectFile *o) {
                          se = o->end_symbols(); si != se; si.increment(ec)) {
       if (error(ec)) return;
       StringRef Name;
-      uint64_t Offset;
+      uint64_t Address;
       bool Global;
       SymbolRef::Type Type;
       bool Weak;
@@ -485,7 +485,7 @@ static void PrintSymbolTable(const ObjectFile *o) {
       uint64_t Size;
       section_iterator Section = o->end_sections();
       if (error(si->getName(Name))) continue;
-      if (error(si->getOffset(Offset))) continue;
+      if (error(si->getAddress(Address))) continue;
       if (error(si->isGlobal(Global))) continue;
       if (error(si->getType(Type))) continue;
       if (error(si->isWeak(Weak))) continue;
@@ -493,8 +493,10 @@ static void PrintSymbolTable(const ObjectFile *o) {
       if (error(si->getSize(Size))) continue;
       if (error(si->getSection(Section))) continue;
 
-      if (Offset == UnknownAddressOrSize)
-        Offset = 0;
+      if (Address == UnknownAddressOrSize)
+        Address = 0;
+      if (Size == UnknownAddressOrSize)
+        Size = 0;
       char GlobLoc = ' ';
       if (Type != SymbolRef::ST_External)
         GlobLoc = Global ? 'g' : 'l';
@@ -506,7 +508,7 @@ static void PrintSymbolTable(const ObjectFile *o) {
       else if (Type == SymbolRef::ST_Function)
         FileFunc = 'F';
 
-      outs() << format("%08"PRIx64, Offset) << " "
+      outs() << format("%08"PRIx64, Address) << " "
              << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
              << (Weak ? 'w' : ' ') // Weak?
              << ' ' // Constructor. Not supported yet.
diff --git a/tools/llvm-prof/LLVMBuild.txt b/tools/llvm-prof/LLVMBuild.txt
index 18e18ea..d59127c 100644
--- a/tools/llvm-prof/LLVMBuild.txt
+++ b/tools/llvm-prof/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-prof
 parent = Tools
 required_libraries = Analysis BitReader
-
diff --git a/tools/llvm-ranlib/LLVMBuild.txt b/tools/llvm-ranlib/LLVMBuild.txt
index 569378b..23015c5 100644
--- a/tools/llvm-ranlib/LLVMBuild.txt
+++ b/tools/llvm-ranlib/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-ranlib
 parent = Tools
 required_libraries = Archive
-
diff --git a/tools/llvm-rtdyld/LLVMBuild.txt b/tools/llvm-rtdyld/LLVMBuild.txt
index 6767482..b36d13c 100644
--- a/tools/llvm-rtdyld/LLVMBuild.txt
+++ b/tools/llvm-rtdyld/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-rtdyld
 parent = Tools
 required_libraries = JIT MC Object RuntimeDyld Support all-targets
-
diff --git a/tools/llvm-size/LLVMBuild.txt b/tools/llvm-size/LLVMBuild.txt
index 1ed445c..b4c538a 100644
--- a/tools/llvm-size/LLVMBuild.txt
+++ b/tools/llvm-size/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-size
 parent = Tools
 required_libraries = Object
-
diff --git a/tools/llvm-stub/LLVMBuild.txt b/tools/llvm-stub/LLVMBuild.txt
index f6a34b5..5c3534c 100644
--- a/tools/llvm-stub/LLVMBuild.txt
+++ b/tools/llvm-stub/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = llvm-stub
 parent = Tools
 required_libraries = 
-
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 9093073..77d7dfe 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -265,7 +265,8 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
         SubtargetFeatures Features;
         Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
         std::string FeatureStr = Features.getString();
-        _target = march->createTargetMachine(Triple, _mCpu, FeatureStr,
+        TargetOptions Options;
+        _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
                                              RelocModel);
     }
     return false;
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 8ea680d..0b73729 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -159,7 +159,9 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
   std::string FeatureStr = Features.getString();
   std::string CPU;
-  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr);
+  TargetOptions Options;
+  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
+                                                     Options);
   LTOModule *Ret = new LTOModule(m.take(), target);
   if (Ret->ParseSymbols(errMsg)) {
     delete Ret;
diff --git a/tools/macho-dump/LLVMBuild.txt b/tools/macho-dump/LLVMBuild.txt
index 75d3f40..1ad9b84 100644
--- a/tools/macho-dump/LLVMBuild.txt
+++ b/tools/macho-dump/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = macho-dump
 parent = Tools
 required_libraries = Object Support
-
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index 6cd3fda..4de99f5 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = Tool
 name = opt
 parent = Tools
 required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar
-
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index fe73462..5161364 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -407,6 +407,8 @@ static inline void addPass(PassManagerBase &PM, Pass *P) {
 /// OptLevel - Optimization Level
 static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
                                   unsigned OptLevel) {
+  FPM.add(createVerifierPass());                  // Verify that input is correct
+
   PassManagerBuilder Builder;
   Builder.OptLevel = OptLevel;
 
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index b6e02e3..cc207f7 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -653,4 +653,28 @@ TEST(APFloatTest, getLargest) {
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble).convertToDouble());
 }
 
+TEST(APFloatTest, convert) {
+  bool losesInfo;
+  APFloat test(APFloat::IEEEdouble, "1.0");
+  test.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat(APFloat::x87DoubleExtended, "0x1p-53");
+  test.add(APFloat(APFloat::x87DoubleExtended, "1.0"), APFloat::rmNearestTiesToEven);
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0, test.convertToDouble());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEquad, "0x1p-53");
+  test.add(APFloat(APFloat::IEEEquad, "1.0"), APFloat::rmNearestTiesToEven);
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0, test.convertToDouble());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::x87DoubleExtended, "0xf.fffffffp+28");
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(4294967295.0, test.convertToDouble());
+  EXPECT_FALSE(losesInfo);
+}
 }
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 4dcef20..74a2ccd 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -22,12 +22,13 @@ namespace {
 class ExecutionEngineTest : public testing::Test {
 protected:
   ExecutionEngineTest()
-    : M(new Module("<main>", getGlobalContext())),
-      Engine(EngineBuilder(M).create()) {
+    : M(new Module("<main>", getGlobalContext())), Error(""),
+      Engine(EngineBuilder(M).setErrorStr(&Error).create()) {
   }
 
   virtual void SetUp() {
-    ASSERT_TRUE(Engine.get() != NULL);
+    ASSERT_TRUE(Engine.get() != NULL) << "EngineBuilder returned error: '"
+      << Error << "'";
   }
 
   GlobalVariable *NewExtGlobal(Type *T, const Twine &Name) {
@@ -36,6 +37,7 @@ protected:
   }
 
   Module *const M;
+  std::string Error;
   const OwningPtr<ExecutionEngine> Engine;
 };
 
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 580ad7d..bd32aed 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -34,7 +34,7 @@ ifneq ($(HAVE_PTHREAD), 1)
   CPP.Flags += -DGTEST_HAS_PTHREAD=0
 endif
 
-TESTLIBS = -lGoogleTest -lUnitTestMain
+TESTLIBS = -lgtest -lgtest_main
 
 ifeq ($(ENABLE_SHARED), 1)
   ifneq (,$(RPATH))
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 60d08bc..358dad0 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -183,6 +183,11 @@ TEST_F(FileSystemTest, TempFiles) {
   ASSERT_NO_ERROR(fs::unique_file("%%-%%-%%-%%.temp", FD2, TempPath2));
   ASSERT_NE(TempPath.str(), TempPath2.str());
 
+  fs::file_status A, B;
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath), A));
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath2), B));
+  EXPECT_FALSE(fs::equivalent(A, B));
+
   // Try to copy the first to the second.
   EXPECT_EQ(
     fs::copy_file(Twine(TempPath), Twine(TempPath2)), errc::file_exists);
@@ -204,6 +209,9 @@ TEST_F(FileSystemTest, TempFiles) {
   bool equal;
   ASSERT_NO_ERROR(fs::equivalent(Twine(TempPath), Twine(TempPath2), equal));
   EXPECT_TRUE(equal);
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath), A));
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath2), B));
+  EXPECT_TRUE(fs::equivalent(A, B));
 
   // Remove Temp1.
   ::close(FileDescriptor);
@@ -223,6 +231,60 @@ TEST_F(FileSystemTest, DirectoryIteration) {
   error_code ec;
   for (fs::directory_iterator i(".", ec), e; i != e; i.increment(ec))
     ASSERT_NO_ERROR(ec);
+
+  // Create a known hierarchy to recurse over.
+  bool existed;
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/a0/aa1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/a0/ab1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/dontlookhere/da1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/z0/za1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/pop/p1", existed));
+  typedef std::vector<std::string> v_t;
+  v_t visited;
+  for (fs::recursive_directory_iterator i(Twine(TestDirectory)
+         + "/recursive", ec), e; i != e; i.increment(ec)){
+    ASSERT_NO_ERROR(ec);
+    if (path::filename(i->path()) == "p1") {
+      i.pop();
+      // FIXME: recursive_directory_iterator should be more robust.
+      if (i == e) break;
+    }
+    if (path::filename(i->path()) == "dontlookhere")
+      i.no_push();
+    visited.push_back(path::filename(i->path()));
+  }
+  v_t::const_iterator a0 = std::find(visited.begin(), visited.end(), "a0");
+  v_t::const_iterator aa1 = std::find(visited.begin(), visited.end(), "aa1");
+  v_t::const_iterator ab1 = std::find(visited.begin(), visited.end(), "ab1");
+  v_t::const_iterator dontlookhere = std::find(visited.begin(), visited.end(),
+                                               "dontlookhere");
+  v_t::const_iterator da1 = std::find(visited.begin(), visited.end(), "da1");
+  v_t::const_iterator z0 = std::find(visited.begin(), visited.end(), "z0");
+  v_t::const_iterator za1 = std::find(visited.begin(), visited.end(), "za1");
+  v_t::const_iterator pop = std::find(visited.begin(), visited.end(), "pop");
+  v_t::const_iterator p1 = std::find(visited.begin(), visited.end(), "p1");
+
+  // Make sure that each path was visited correctly.
+  ASSERT_NE(a0, visited.end());
+  ASSERT_NE(aa1, visited.end());
+  ASSERT_NE(ab1, visited.end());
+  ASSERT_NE(dontlookhere, visited.end());
+  ASSERT_EQ(da1, visited.end()); // Not visited.
+  ASSERT_NE(z0, visited.end());
+  ASSERT_NE(za1, visited.end());
+  ASSERT_NE(pop, visited.end());
+  ASSERT_EQ(p1, visited.end()); // Not visited.
+
+  // Make sure that parents were visited before children. No other ordering
+  // guarantees can be made across siblings.
+  ASSERT_LT(a0, aa1);
+  ASSERT_LT(a0, ab1);
+  ASSERT_LT(z0, za1);
 }
 
 TEST_F(FileSystemTest, Magic) {
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index f0197bb..218a9a0 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -13,6 +13,8 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
@@ -129,5 +131,100 @@ TEST(InstructionsTest, CastInst) {
   EXPECT_EQ(CastInst::SExt, CastInst::getCastOpcode(c8, true, V8x64Ty, true));
 }
 
+
+
+TEST(InstructionsTest, VectorGep) {
+  LLVMContext &C(getGlobalContext());
+
+  // Type Definitions
+  PointerType *Ptri8Ty = PointerType::get(IntegerType::get(C, 8), 0);
+  PointerType *Ptri32Ty = PointerType::get(IntegerType::get(C, 8), 0);
+
+  VectorType *V2xi8PTy = VectorType::get(Ptri8Ty, 2);
+  VectorType *V2xi32PTy = VectorType::get(Ptri32Ty, 2);
+
+  // Test different aspects of the vector-of-pointers type
+  // and GEPs which use this type.
+  ConstantInt *Ci32a = ConstantInt::get(C, APInt(32, 1492));
+  ConstantInt *Ci32b = ConstantInt::get(C, APInt(32, 1948));
+  std::vector<Constant*> ConstVa(2, Ci32a);
+  std::vector<Constant*> ConstVb(2, Ci32b);
+  Constant *C2xi32a = ConstantVector::get(ConstVa);
+  Constant *C2xi32b = ConstantVector::get(ConstVb);
+
+  CastInst *PtrVecA = new IntToPtrInst(C2xi32a, V2xi32PTy);
+  CastInst *PtrVecB = new IntToPtrInst(C2xi32b, V2xi32PTy);
+
+  ICmpInst *ICmp0 = new ICmpInst(ICmpInst::ICMP_SGT, PtrVecA, PtrVecB);
+  ICmpInst *ICmp1 = new ICmpInst(ICmpInst::ICMP_ULT, PtrVecA, PtrVecB);
+  EXPECT_NE(ICmp0, ICmp1); // suppress warning.
+
+  GetElementPtrInst *Gep0 = GetElementPtrInst::Create(PtrVecA, C2xi32a);
+  GetElementPtrInst *Gep1 = GetElementPtrInst::Create(PtrVecA, C2xi32b);
+  GetElementPtrInst *Gep2 = GetElementPtrInst::Create(PtrVecB, C2xi32a);
+  GetElementPtrInst *Gep3 = GetElementPtrInst::Create(PtrVecB, C2xi32b);
+
+  CastInst *BTC0 = new BitCastInst(Gep0, V2xi8PTy);
+  CastInst *BTC1 = new BitCastInst(Gep1, V2xi8PTy);
+  CastInst *BTC2 = new BitCastInst(Gep2, V2xi8PTy);
+  CastInst *BTC3 = new BitCastInst(Gep3, V2xi8PTy);
+
+  Value *S0 = BTC0->stripPointerCasts();
+  Value *S1 = BTC1->stripPointerCasts();
+  Value *S2 = BTC2->stripPointerCasts();
+  Value *S3 = BTC3->stripPointerCasts();
+
+  EXPECT_NE(S0, Gep0);
+  EXPECT_NE(S1, Gep1);
+  EXPECT_NE(S2, Gep2);
+  EXPECT_NE(S3, Gep3);
+
+  int64_t Offset;
+  TargetData TD("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3"
+                "2:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80"
+                ":128:128-n8:16:32:64-S128");
+  // Make sure we don't crash
+  GetPointerBaseWithConstantOffset(Gep0, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep1, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep2, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep3, Offset, TD);
+
+  // Gep of Geps
+  GetElementPtrInst *GepII0 = GetElementPtrInst::Create(Gep0, C2xi32b);
+  GetElementPtrInst *GepII1 = GetElementPtrInst::Create(Gep1, C2xi32a);
+  GetElementPtrInst *GepII2 = GetElementPtrInst::Create(Gep2, C2xi32b);
+  GetElementPtrInst *GepII3 = GetElementPtrInst::Create(Gep3, C2xi32a);
+
+  EXPECT_EQ(GepII0->getNumIndices(), 1u);
+  EXPECT_EQ(GepII1->getNumIndices(), 1u);
+  EXPECT_EQ(GepII2->getNumIndices(), 1u);
+  EXPECT_EQ(GepII3->getNumIndices(), 1u);
+
+  EXPECT_FALSE(GepII0->hasAllZeroIndices());
+  EXPECT_FALSE(GepII1->hasAllZeroIndices());
+  EXPECT_FALSE(GepII2->hasAllZeroIndices());
+  EXPECT_FALSE(GepII3->hasAllZeroIndices());
+
+  delete GepII0;
+  delete GepII1;
+  delete GepII2;
+  delete GepII3;
+
+  delete BTC0;
+  delete BTC1;
+  delete BTC2;
+  delete BTC3;
+
+  delete Gep0;
+  delete Gep1;
+  delete Gep2;
+  delete Gep3;
+
+  delete ICmp0;
+  delete ICmp1;
+  delete PtrVecA;
+  delete PtrVecB;
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
index b493920..9bed37d 100644
--- a/unittests/VMCore/ValueMapTest.cpp
+++ b/unittests/VMCore/ValueMapTest.cpp
@@ -12,7 +12,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 
 #include "gtest/gtest.h"
 
@@ -195,7 +195,7 @@ struct LockMutex : ValueMapConfig<KeyT> {
   }
   static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
 };
-#if ENABLE_THREADS
+#if LLVM_ENABLE_THREADS
 TYPED_TEST(ValueMapTest, LocksMutex) {
   sys::Mutex M(false);  // Not recursive.
   bool CalledRAUW = false, CalledDeleted = false;
diff --git a/utils/LLVMBuild.txt b/utils/LLVMBuild.txt
index fc16720..382bfd3 100644
--- a/utils/LLVMBuild.txt
+++ b/utils/LLVMBuild.txt
@@ -15,6 +15,9 @@
 ;
 ;===------------------------------------------------------------------------===;
 
+[common]
+subdirectories = TableGen unittest
+
 [component_0]
 type = Group
 name = BuildTools
@@ -24,4 +27,3 @@ parent = $ROOT
 type = Group
 name = UtilityTools
 parent = $ROOT
-
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 01754c3..5ef0db9 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -252,11 +252,6 @@ public:
     switch (Kind) {
     case Invalid:
       assert(0 && "Invalid kind!");
-    case Token:
-      // Tokens are comparable by value.
-      //
-      // FIXME: Compare by enum value.
-      return ValueName < RHS.ValueName;
 
     default:
       // This class precedes the RHS if it is a proper subset of the RHS.
@@ -737,7 +732,9 @@ void MatchableInfo::TokenizeAsmString(const AsmMatcherInfo &Info) {
 
   // The first token of the instruction is the mnemonic, which must be a
   // simple string, not a $foo variable or a singleton register.
-  assert(!AsmOperands.empty() && "Instruction has no tokens?");
+  if (AsmOperands.empty())
+    throw TGError(TheDef->getLoc(),
+                  "Instruction '" + TheDef->getName() + "' has no tokens");
   Mnemonic = AsmOperands[0].Token;
   if (Mnemonic[0] == '$' || getSingletonRegisterForAsmOperand(0, Info))
     throw TGError(TheDef->getLoc(),
@@ -1302,6 +1299,17 @@ void AsmMatcherInfo::BuildInfo() {
       II->BuildAliasResultOperands();
   }
 
+  // Process token alias definitions and set up the associated superclass
+  // information.
+  std::vector<Record*> AllTokenAliases =
+    Records.getAllDerivedDefinitions("TokenAlias");
+  for (unsigned i = 0, e = AllTokenAliases.size(); i != e; ++i) {
+    Record *Rec = AllTokenAliases[i];
+    ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken"));
+    ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken"));
+    FromClass->SuperClasses.push_back(ToClass);
+  }
+
   // Reorder classes so that classes precede super classes.
   std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
 }
@@ -1663,7 +1671,7 @@ static void EmitMatchClassEnumeration(CodeGenTarget &Target,
 /// EmitValidateOperandClass - Emit the function to validate an operand class.
 static void EmitValidateOperandClass(AsmMatcherInfo &Info,
                                      raw_ostream &OS) {
-  OS << "static bool ValidateOperandClass(MCParsedAsmOperand *GOp, "
+  OS << "static bool validateOperandClass(MCParsedAsmOperand *GOp, "
      << "MatchClassKind Kind) {\n";
   OS << "  " << Info.Target.getName() << "Operand &Operand = *("
      << Info.Target.getName() << "Operand*)GOp;\n";
@@ -1674,7 +1682,8 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
 
   // Check for Token operands first.
   OS << "  if (Operand.isToken())\n";
-  OS << "    return MatchTokenString(Operand.getToken()) == Kind;\n\n";
+  OS << "    return isSubclass(matchTokenString(Operand.getToken()), Kind);"
+     << "\n\n";
 
   // Check for register operands, including sub-classes.
   OS << "  if (Operand.isReg()) {\n";
@@ -1688,7 +1697,7 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
        << it->first->getName() << ": OpKind = " << it->second->Name
        << "; break;\n";
   OS << "    }\n";
-  OS << "    return IsSubclass(OpKind, Kind);\n";
+  OS << "    return isSubclass(OpKind, Kind);\n";
   OS << "  }\n\n";
 
   // Check the user classes. We don't care what order since we're only
@@ -1715,8 +1724,8 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
 static void EmitIsSubclass(CodeGenTarget &Target,
                            std::vector<ClassInfo*> &Infos,
                            raw_ostream &OS) {
-  OS << "/// IsSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
-  OS << "static bool IsSubclass(MatchClassKind A, MatchClassKind B) {\n";
+  OS << "/// isSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
+  OS << "static bool isSubclass(MatchClassKind A, MatchClassKind B) {\n";
   OS << "  if (A == B)\n";
   OS << "    return true;\n\n";
 
@@ -1727,32 +1736,30 @@ static void EmitIsSubclass(CodeGenTarget &Target,
          ie = Infos.end(); it != ie; ++it) {
     ClassInfo &A = **it;
 
-    if (A.Kind != ClassInfo::Token) {
-      std::vector<StringRef> SuperClasses;
-      for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
-             ie = Infos.end(); it != ie; ++it) {
-        ClassInfo &B = **it;
-
-        if (&A != &B && A.isSubsetOf(B))
-          SuperClasses.push_back(B.Name);
-      }
+    std::vector<StringRef> SuperClasses;
+    for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+      ClassInfo &B = **it;
 
-      if (SuperClasses.empty())
-        continue;
+      if (&A != &B && A.isSubsetOf(B))
+        SuperClasses.push_back(B.Name);
+    }
 
-      OS << "\n  case " << A.Name << ":\n";
+    if (SuperClasses.empty())
+      continue;
 
-      if (SuperClasses.size() == 1) {
-        OS << "    return B == " << SuperClasses.back() << ";\n";
-        continue;
-      }
+    OS << "\n  case " << A.Name << ":\n";
 
-      OS << "    switch (B) {\n";
-      OS << "    default: return false;\n";
-      for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
-        OS << "    case " << SuperClasses[i] << ": return true;\n";
-      OS << "    }\n";
+    if (SuperClasses.size() == 1) {
+      OS << "    return B == " << SuperClasses.back() << ";\n";
+      continue;
     }
+
+    OS << "    switch (B) {\n";
+    OS << "    default: return false;\n";
+    for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+      OS << "    case " << SuperClasses[i] << ": return true;\n";
+    OS << "    }\n";
   }
   OS << "  }\n";
   OS << "}\n\n";
@@ -1774,7 +1781,7 @@ static void EmitMatchTokenString(CodeGenTarget &Target,
                                                   "return " + CI.Name + ";"));
   }
 
-  OS << "static MatchClassKind MatchTokenString(StringRef Name) {\n";
+  OS << "static MatchClassKind matchTokenString(StringRef Name) {\n";
 
   StringMatcher("Name", Matches, OS).Emit();
 
@@ -1912,7 +1919,7 @@ static bool EmitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
     Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
   if (Aliases.empty()) return false;
 
-  OS << "static void ApplyMnemonicAliases(StringRef &Mnemonic, "
+  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
         "unsigned Features) {\n";
 
   // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
@@ -2061,7 +2068,7 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // the found operand class.
   OS << Target.getName() << ClassName << "::OperandMatchResultTy "
      << Target.getName() << ClassName << "::\n"
-     << "TryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
+     << "tryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
      << " &Operands,\n                      unsigned MCK) {\n\n"
      << "  switch(MCK) {\n";
 
@@ -2127,7 +2134,7 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit call to the custom parser method
   OS << "    // call custom parse method to handle the operand\n";
   OS << "    OperandMatchResultTy Result = ";
-  OS << "TryCustomParseOperand(Operands, it->Class);\n";
+  OS << "tryCustomParseOperand(Operands, it->Class);\n";
   OS << "    if (Result != MatchOperand_NoMatch)\n";
   OS << "      return Result;\n";
   OS << "  }\n\n";
@@ -2214,7 +2221,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
     OS << "    StringRef Mnemonic);\n";
 
-    OS << "  OperandMatchResultTy TryCustomParseOperand(\n";
+    OS << "  OperandMatchResultTy tryCustomParseOperand(\n";
     OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
     OS << "    unsigned MCK);\n\n";
   }
@@ -2361,7 +2368,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasMnemonicAliases) {
     OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
-    OS << "  ApplyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+    OS << "  applyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
   }
 
   // Emit code to compute the class list for this operand vector.
@@ -2403,7 +2410,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
   OS << "        break;\n";
   OS << "      }\n";
-  OS << "      if (ValidateOperandClass(Operands[i+1], "
+  OS << "      if (validateOperandClass(Operands[i+1], "
                                        "(MatchClassKind)it->Classes[i]))\n";
   OS << "        continue;\n";
   OS << "      // If this operand is broken for all of the instances of this\n";
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 9e87515..ac33f99 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -17,6 +17,7 @@ add_tablegen(llvm-tblgen LLVM
   DAGISelMatcherGen.cpp
   DAGISelMatcherOpt.cpp
   DAGISelMatcher.cpp
+  DFAPacketizerEmitter.cpp
   DisassemblerEmitter.cpp
   EDEmitter.cpp
   FastISelEmitter.cpp
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 8de4615..3aadf50 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -582,6 +582,23 @@ void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) {
   Key2RC.insert(std::make_pair(K, RC));
 }
 
+// Create a synthetic sub-class if it is missing.
+CodeGenRegisterClass*
+CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
+                                    const CodeGenRegister::Set *Members,
+                                    StringRef Name) {
+  // Synthetic sub-class has the same size and alignment as RC.
+  CodeGenRegisterClass::Key K(Members, RC->SpillSize, RC->SpillAlignment);
+  RCKeyMap::const_iterator FoundI = Key2RC.find(K);
+  if (FoundI != Key2RC.end())
+    return FoundI->second;
+
+  // Sub-class doesn't exist, create a new one.
+  CodeGenRegisterClass *NewRC = new CodeGenRegisterClass(Name, K);
+  addToMaps(NewRC);
+  return NewRC;
+}
+
 CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) {
   if (CodeGenRegisterClass *RC = Def2RC[Def])
     return RC;
@@ -739,61 +756,102 @@ void CodeGenRegBank::computeDerivedInfo() {
   computeComposites();
 }
 
+//
+// Synthesize missing register class intersections.
+//
+// Make sure that sub-classes of RC exists such that getCommonSubClass(RC, X)
+// returns a maximal register class for all X.
+//
+void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) {
+  for (unsigned rci = 0, rce = RegClasses.size(); rci != rce; ++rci) {
+    CodeGenRegisterClass *RC1 = RC;
+    CodeGenRegisterClass *RC2 = RegClasses[rci];
+    if (RC1 == RC2)
+      continue;
+
+    // Compute the set intersection of RC1 and RC2.
+    const CodeGenRegister::Set &Memb1 = RC1->getMembers();
+    const CodeGenRegister::Set &Memb2 = RC2->getMembers();
+    CodeGenRegister::Set Intersection;
+    std::set_intersection(Memb1.begin(), Memb1.end(),
+                          Memb2.begin(), Memb2.end(),
+                          std::inserter(Intersection, Intersection.begin()),
+                          CodeGenRegister::Less());
+
+    // Skip disjoint class pairs.
+    if (Intersection.empty())
+      continue;
+
+    // If RC1 and RC2 have different spill sizes or alignments, use the
+    // larger size for sub-classing.  If they are equal, prefer RC1.
+    if (RC2->SpillSize > RC1->SpillSize ||
+        (RC2->SpillSize == RC1->SpillSize &&
+         RC2->SpillAlignment > RC1->SpillAlignment))
+      std::swap(RC1, RC2);
+
+    getOrCreateSubClass(RC1, &Intersection,
+                        RC1->getName() + "_and_" + RC2->getName());
+  }
+}
+
+//
+// Synthesize missing sub-classes for getSubClassWithSubReg().
+//
+// Make sure that the set of registers in RC with a given SubIdx sub-register
+// form a register class.  Update RC->SubClassWithSubReg.
+//
+void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
+  // Map SubRegIndex to set of registers in RC supporting that SubRegIndex.
+  typedef std::map<Record*, CodeGenRegister::Set, LessRecord> SubReg2SetMap;
+
+  // Compute the set of registers supporting each SubRegIndex.
+  SubReg2SetMap SRSets;
+  for (CodeGenRegister::Set::const_iterator RI = RC->getMembers().begin(),
+       RE = RC->getMembers().end(); RI != RE; ++RI) {
+    const CodeGenRegister::SubRegMap &SRM = (*RI)->getSubRegs();
+    for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
+         E = SRM.end(); I != E; ++I)
+      SRSets[I->first].insert(*RI);
+  }
+
+  // Find matching classes for all SRSets entries.  Iterate in SubRegIndex
+  // numerical order to visit synthetic indices last.
+  for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
+    Record *SubIdx = SubRegIndices[sri];
+    SubReg2SetMap::const_iterator I = SRSets.find(SubIdx);
+    // Unsupported SubRegIndex. Skip it.
+    if (I == SRSets.end())
+      continue;
+    // In most cases, all RC registers support the SubRegIndex.
+    if (I->second.size() == RC->getMembers().size()) {
+      RC->setSubClassWithSubReg(SubIdx, RC);
+      continue;
+    }
+    // This is a real subset.  See if we have a matching class.
+    CodeGenRegisterClass *SubRC =
+      getOrCreateSubClass(RC, &I->second,
+                          RC->getName() + "_with_" + I->first->getName());
+    RC->setSubClassWithSubReg(SubIdx, SubRC);
+  }
+}
+
+//
 // Infer missing register classes.
 //
-// For every register class RC, make sure that the set of registers in RC with
-// a given SubIxx sub-register form a register class.
 void CodeGenRegBank::computeInferredRegisterClasses() {
   // When this function is called, the register classes have not been sorted
   // and assigned EnumValues yet.  That means getSubClasses(),
   // getSuperClasses(), and hasSubClass() functions are defunct.
 
-  // Map SubRegIndex to register set.
-  typedef std::map<Record*, CodeGenRegister::Set, LessRecord> SubReg2SetMap;
-
   // Visit all register classes, including the ones being added by the loop.
   for (unsigned rci = 0; rci != RegClasses.size(); ++rci) {
-    CodeGenRegisterClass &RC = *RegClasses[rci];
-
-    // Compute the set of registers supporting each SubRegIndex.
-    SubReg2SetMap SRSets;
-    for (CodeGenRegister::Set::const_iterator RI = RC.getMembers().begin(),
-         RE = RC.getMembers().end(); RI != RE; ++RI) {
-      const CodeGenRegister::SubRegMap &SRM = (*RI)->getSubRegs();
-      for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
-           E = SRM.end(); I != E; ++I)
-        SRSets[I->first].insert(*RI);
-    }
+    CodeGenRegisterClass *RC = RegClasses[rci];
 
-    // Find matching classes for all SRSets entries.  Iterate in SubRegIndex
-    // numerical order to visit synthetic indices last.
-    for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
-      Record *SubIdx = SubRegIndices[sri];
-      SubReg2SetMap::const_iterator I = SRSets.find(SubIdx);
-      // Unsupported SubRegIndex. Skip it.
-      if (I == SRSets.end())
-        continue;
-      // In most cases, all RC registers support the SubRegIndex.
-      if (I->second.size() == RC.getMembers().size()) {
-        RC.setSubClassWithSubReg(SubIdx, &RC);
-        continue;
-      }
+    // Synthesize answers for getSubClassWithSubReg().
+    inferSubClassWithSubReg(RC);
 
-      // This is a real subset.  See if we have a matching class.
-      CodeGenRegisterClass::Key K(&I->second, RC.SpillSize, RC.SpillAlignment);
-      RCKeyMap::const_iterator FoundI = Key2RC.find(K);
-      if (FoundI != Key2RC.end()) {
-        RC.setSubClassWithSubReg(SubIdx, FoundI->second);
-        continue;
-      }
-
-      // Class doesn't exist.
-      CodeGenRegisterClass *NewRC =
-        new CodeGenRegisterClass(RC.getName() + "_with_" +
-                                 I->first->getName(), K);
-      addToMaps(NewRC);
-      RC.setSubClassWithSubReg(SubIdx, NewRC);
-    }
+    // Synthesize answers for getCommonSubClass().
+    inferCommonSubClass(RC);
   }
 }
 
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 4fc34b0..c74cfd6 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -237,8 +237,15 @@ namespace llvm {
     // Add RC to *2RC maps.
     void addToMaps(CodeGenRegisterClass*);
 
+    // Create a synthetic sub-class if it is missing.
+    CodeGenRegisterClass *getOrCreateSubClass(const CodeGenRegisterClass *RC,
+                                              const CodeGenRegister::Set *Membs,
+                                              StringRef Name);
+
     // Infer missing register classes.
     void computeInferredRegisterClasses();
+    void inferCommonSubClass(CodeGenRegisterClass *RC);
+    void inferSubClassWithSubReg(CodeGenRegisterClass *RC);
 
     // Composite SubRegIndex instances.
     // Map (SubRegIndex, SubRegIndex) -> SubRegIndex.
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 6e1872e..c8d2b00 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -267,6 +267,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
     "DBG_VALUE",
     "REG_SEQUENCE",
     "COPY",
+    "BUNDLE",
     0
   };
   const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
new file mode 100644
index 0000000..2862d0c
--- /dev/null
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -0,0 +1,512 @@
+//===- DFAPacketizerEmitter.cpp - Packetization DFA for a VLIW machine-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class parses the Schedule.td file and produces an API that can be used
+// to reason about whether an instruction can be added to a packet on a VLIW
+// architecture. The class internally generates a deterministic finite
+// automaton (DFA) that models all possible mappings of machine instructions
+// to functional units as instructions are added to a packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/TableGen/Record.h"
+#include "CodeGenTarget.h"
+#include "DFAPacketizerEmitter.h"
+#include <list>
+
+using namespace llvm;
+
+//
+//
+// State represents the usage of machine resources if the packet contains
+// a set of instruction classes.
+//
+// Specifically, currentState is a set of bit-masks.
+// The nth bit in a bit-mask indicates whether the nth resource is being used
+// by this state. The set of bit-masks in a state represent the different
+// possible outcomes of transitioning to this state.
+// For example: consider a two resource architecture: resource L and resource M
+// with three instruction classes: L, M, and L_or_M.
+// From the initial state (currentState = 0x00), if we add instruction class
+// L_or_M we will transition to a state with currentState = [0x01, 0x10]. This
+// represents the possible resource states that can result from adding a L_or_M
+// instruction
+//
+// Another way of thinking about this transition is we are mapping a NDFA with
+// two states [0x01] and [0x10] into a DFA with a single state [0x01, 0x10].
+//
+//
+namespace {
+class State {
+ public:
+  static int currentStateNum;
+  int stateNum;
+  bool isInitial;
+  std::set<unsigned> stateInfo;
+
+  State();
+  State(const State &S);
+
+  //
+  // canAddInsnClass - Returns true if an instruction of type InsnClass is a
+  // valid transition from this state, i.e., can an instruction of type InsnClass
+  // be added to the packet represented by this state.
+  //
+  // PossibleStates is the set of valid resource states that ensue from valid
+  // transitions.
+  //
+  bool canAddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates);
+};
+} // End anonymous namespace.
+
+
+namespace {
+struct Transition {
+ public:
+  static int currentTransitionNum;
+  int transitionNum;
+  State *from;
+  unsigned input;
+  State *to;
+
+  Transition(State *from_, unsigned input_, State *to_);
+};
+} // End anonymous namespace.
+
+
+//
+// Comparators to keep set of states sorted.
+//
+namespace {
+struct ltState {
+  bool operator()(const State *s1, const State *s2) const;
+};
+} // End anonymous namespace.
+
+
+//
+// class DFA: deterministic finite automaton for processor resource tracking.
+//
+namespace {
+class DFA {
+public:
+  DFA();
+
+  // Set of states. Need to keep this sorted to emit the transition table.
+  std::set<State*, ltState> states;
+
+  // Map from a state to the list of transitions with that state as source.
+  std::map<State*, SmallVector<Transition*, 16>, ltState> stateTransitions;
+  State *currentState;
+
+  // Highest valued Input seen.
+  unsigned LargestInput;
+
+  //
+  // Modify the DFA.
+  //
+  void initialize();
+  void addState(State *);
+  void addTransition(Transition *);
+
+  //
+  // getTransition -  Return the state when a transition is made from
+  // State From with Input I. If a transition is not found, return NULL.
+  //
+  State *getTransition(State *, unsigned);
+
+  //
+  // isValidTransition: Predicate that checks if there is a valid transition
+  // from state From on input InsnClass.
+  //
+  bool isValidTransition(State *From, unsigned InsnClass);
+
+  //
+  // writeTable: Print out a table representing the DFA.
+  //
+  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+};
+} // End anonymous namespace.
+
+
+//
+// Constructors for State, Transition, and DFA
+//
+State::State() :
+  stateNum(currentStateNum++), isInitial(false) {}
+
+
+State::State(const State &S) :
+  stateNum(currentStateNum++), isInitial(S.isInitial),
+  stateInfo(S.stateInfo) {}
+
+
+Transition::Transition(State *from_, unsigned input_, State *to_) :
+  transitionNum(currentTransitionNum++), from(from_), input(input_),
+  to(to_) {}
+
+
+DFA::DFA() :
+  LargestInput(0) {}
+
+
+bool ltState::operator()(const State *s1, const State *s2) const {
+    return (s1->stateNum < s2->stateNum);
+}
+
+
+//
+// canAddInsnClass - Returns true if an instruction of type InsnClass is a
+// valid transition from this state i.e., can an instruction of type InsnClass
+// be added to the packet represented by this state.
+//
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+bool State::canAddInsnClass(unsigned InsnClass,
+                            std::set<unsigned> &PossibleStates) {
+  //
+  // Iterate over all resource states in currentState.
+  //
+  bool AddedState = false;
+
+  for (std::set<unsigned>::iterator SI = stateInfo.begin();
+       SI != stateInfo.end(); ++SI) {
+    unsigned thisState = *SI;
+
+    //
+    // Iterate over all possible resources used in InsnClass.
+    // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
+    //
+
+    DenseSet<unsigned> VisitedResourceStates;
+    for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
+      if ((0x1 << j) & InsnClass) {
+        //
+        // For each possible resource used in InsnClass, generate the
+        // resource state if that resource was used.
+        //
+        unsigned ResultingResourceState = thisState | (0x1 << j);
+        //
+        // Check if the resulting resource state can be accommodated in this
+        // packet.
+        // We compute ResultingResourceState OR thisState.
+        // If the result of the OR is different than thisState, it implies
+        // that there is at least one resource that can be used to schedule
+        // InsnClass in the current packet.
+        // Insert ResultingResourceState into PossibleStates only if we haven't
+        // processed ResultingResourceState before.
+        //
+        if ((ResultingResourceState != thisState) &&
+            (VisitedResourceStates.count(ResultingResourceState) == 0)) {
+          VisitedResourceStates.insert(ResultingResourceState);
+          PossibleStates.insert(ResultingResourceState);
+          AddedState = true;
+        }
+      }
+    }
+  }
+
+  return AddedState;
+}
+
+
+void DFA::initialize() {
+  currentState->isInitial = true;
+}
+
+
+void DFA::addState(State *S) {
+  assert(!states.count(S) && "State already exists");
+  states.insert(S);
+}
+
+
+void DFA::addTransition(Transition *T) {
+  // Update LargestInput.
+  if (T->input > LargestInput)
+    LargestInput = T->input;
+
+  // Add the new transition.
+  stateTransitions[T->from].push_back(T);
+}
+
+
+//
+// getTransition - Return the state when a transition is made from
+// State From with Input I. If a transition is not found, return NULL.
+//
+State *DFA::getTransition(State *From, unsigned I) {
+  // Do we have a transition from state From?
+  if (!stateTransitions.count(From))
+    return NULL;
+
+  // Do we have a transition from state From with Input I?
+  for (SmallVector<Transition*, 16>::iterator VI =
+         stateTransitions[From].begin();
+         VI != stateTransitions[From].end(); ++VI)
+    if ((*VI)->input == I)
+      return (*VI)->to;
+
+  return NULL;
+}
+
+
+bool DFA::isValidTransition(State *From, unsigned InsnClass) {
+  return (getTransition(From, InsnClass) != NULL);
+}
+
+
+int State::currentStateNum = 0;
+int Transition::currentTransitionNum = 0;
+
+DFAGen::DFAGen(RecordKeeper &R):
+  TargetName(CodeGenTarget(R).getName()),
+  allInsnClasses(), Records(R) {}
+
+
+//
+// writeTableAndAPI - Print out a table representing the DFA and the
+// associated API to create a DFA packetizer.
+//
+// Format:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+//                           transitions.
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable for
+//                         the ith state.
+//
+//
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
+  std::set<State*, ltState>::iterator SI = states.begin();
+  // This table provides a map to the beginning of the transitions for State s
+  // in DFAStateInputTable.
+  std::vector<int> StateEntry(states.size());
+
+  OS << "namespace llvm {\n\n";
+  OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+
+  // Tracks the total valid transitions encountered so far. It is used
+  // to construct the StateEntry table.
+  int ValidTransitions = 0;
+  for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+    StateEntry[i] = ValidTransitions;
+    for (unsigned j = 0; j <= LargestInput; ++j) {
+      assert (((*SI)->stateNum == (int) i) && "Mismatch in state numbers");
+      if (!isValidTransition(*SI, j))
+        continue;
+
+      OS << "{" << j << ", "
+         << getTransition(*SI, j)->stateNum
+         << "},    ";
+      ++ValidTransitions;
+    }
+
+    // If there are no valid transitions from this stage, we need a sentinel
+    // transition.
+    if (ValidTransitions == StateEntry[i])
+      OS << "{-1, -1},";
+
+    OS << "\n";
+  }
+  OS << "};\n\n";
+  OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
+
+  // Multiply i by 2 since each entry in DFAStateInputTable is a set of
+  // two numbers.
+  for (unsigned i = 0; i < states.size(); ++i)
+    OS << StateEntry[i] << ", ";
+
+  OS << "\n};\n";
+  OS << "} // namespace\n";
+
+
+  //
+  // Emit DFA Packetizer tables if the target is a VLIW machine.
+  //
+  std::string SubTargetClassName = TargetName + "GenSubtargetInfo";
+  OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
+  OS << "namespace llvm {\n";
+  OS << "DFAPacketizer *" << SubTargetClassName << "::"
+     << "createDFAPacketizer(const InstrItineraryData *IID) const {\n"
+     << "   return new DFAPacketizer(IID, " << TargetName
+     << "DFAStateInputTable, " << TargetName << "DFAStateEntryTable);\n}\n\n";
+  OS << "} // End llvm namespace \n";
+}
+
+
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+void DFAGen::collectAllInsnClasses(const std::string &Name,
+                                  Record *ItinData,
+                                  unsigned &NStages,
+                                  raw_ostream &OS) {
+  // Collect processor itineraries.
+  std::vector<Record*> ProcItinList =
+    Records.getAllDerivedDefinitions("ProcessorItineraries");
+
+  // If just no itinerary then don't bother.
+  if (ProcItinList.size() < 2)
+    return;
+  std::map<std::string, unsigned> NameToBitsMap;
+
+  // Parse functional units for all the itineraries.
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
+    Record *Proc = ProcItinList[i];
+    std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+
+    // Convert macros to bits for each stage.
+    for (unsigned i = 0, N = FUs.size(); i < N; ++i)
+      NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
+  }
+
+  const std::vector<Record*> &StageList =
+    ItinData->getValueAsListOfDefs("Stages");
+
+  // The number of stages.
+  NStages = StageList.size();
+
+  // For each unit.
+  unsigned UnitBitValue = 0;
+
+  // Compute the bitwise or of each unit used in this stage.
+  for (unsigned i = 0; i < NStages; ++i) {
+    const Record *Stage = StageList[i];
+
+    // Get unit list.
+    const std::vector<Record*> &UnitList =
+      Stage->getValueAsListOfDefs("Units");
+
+    for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
+      // Conduct bitwise or.
+      std::string UnitName = UnitList[j]->getName();
+      assert(NameToBitsMap.count(UnitName));
+      UnitBitValue |= NameToBitsMap[UnitName];
+    }
+
+    if (UnitBitValue != 0)
+      allInsnClasses.insert(UnitBitValue);
+  }
+}
+
+
+//
+// Run the worklist algorithm to generate the DFA.
+//
+void DFAGen::run(raw_ostream &OS) {
+  EmitSourceFileHeader("Target DFA Packetizer Tables", OS);
+
+  // Collect processor iteraries.
+  std::vector<Record*> ProcItinList =
+    Records.getAllDerivedDefinitions("ProcessorItineraries");
+
+  //
+  // Collect the instruction classes.
+  //
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
+    Record *Proc = ProcItinList[i];
+
+    // Get processor itinerary name.
+    const std::string &Name = Proc->getName();
+
+    // Skip default.
+    if (Name == "NoItineraries")
+      continue;
+
+    // Sanity check for at least one instruction itinerary class.
+    unsigned NItinClasses =
+      Records.getAllDerivedDefinitions("InstrItinClass").size();
+    if (NItinClasses == 0)
+      return;
+
+    // Get itinerary data list.
+    std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
+
+    // Collect instruction classes for all itinerary data.
+    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
+      Record *ItinData = ItinDataList[j];
+      unsigned NStages;
+      collectAllInsnClasses(Name, ItinData, NStages, OS);
+    }
+  }
+
+
+  //
+  // Run a worklist algorithm to generate the DFA.
+  //
+  DFA D;
+  State *Initial = new State;
+  Initial->isInitial = true;
+  Initial->stateInfo.insert(0x0);
+  D.addState(Initial);
+  SmallVector<State*, 32> WorkList;
+  std::map<std::set<unsigned>, State*> Visited;
+
+  WorkList.push_back(Initial);
+
+  //
+  // Worklist algorithm to create a DFA for processor resource tracking.
+  // C = {set of InsnClasses}
+  // Begin with initial node in worklist. Initial node does not have
+  // any consumed resources,
+  //     ResourceState = 0x0
+  // Visited = {}
+  // While worklist != empty
+  //    S = first element of worklist
+  //    For every instruction class C
+  //      if we can accommodate C in S:
+  //          S' = state with resource states = {S Union C}
+  //          Add a new transition: S x C -> S'
+  //          If S' is not in Visited:
+  //             Add S' to worklist
+  //             Add S' to Visited
+  //
+  while (!WorkList.empty()) {
+    State *current = WorkList.pop_back_val();
+    for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
+           CE = allInsnClasses.end(); CI != CE; ++CI) {
+      unsigned InsnClass = *CI;
+
+      std::set<unsigned> NewStateResources;
+      //
+      // If we haven't already created a transition for this input
+      // and the state can accommodate this InsnClass, create a transition.
+      //
+      if (!D.getTransition(current, InsnClass) &&
+          current->canAddInsnClass(InsnClass, NewStateResources)) {
+        State *NewState = NULL;
+
+        //
+        // If we have seen this state before, then do not create a new state.
+        //
+        //
+        std::map<std::set<unsigned>, State*>::iterator VI;
+        if ((VI = Visited.find(NewStateResources)) != Visited.end())
+          NewState = VI->second;
+        else {
+          NewState = new State;
+          NewState->stateInfo = NewStateResources;
+          D.addState(NewState);
+          Visited[NewStateResources] = NewState;
+          WorkList.push_back(NewState);
+        }
+
+        Transition *NewTransition = new Transition(current, InsnClass,
+                                                   NewState);
+        D.addTransition(NewTransition);
+      }
+    }
+  }
+
+  // Print out the table.
+  D.writeTableAndAPI(OS, TargetName);
+}
diff --git a/utils/TableGen/DFAPacketizerEmitter.h b/utils/TableGen/DFAPacketizerEmitter.h
new file mode 100644
index 0000000..8f47dde
--- /dev/null
+++ b/utils/TableGen/DFAPacketizerEmitter.h
@@ -0,0 +1,54 @@
+//===- DFAPacketizerEmitter.h - Packetization DFA for a VLIW machine-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class parses the Schedule.td file and produces an API that can be used
+// to reason about whether an instruction can be added to a packet on a VLIW
+// architecture. The class internally generates a deterministic finite
+// automaton (DFA) that models all possible mappings of machine instructions
+// to functional units as instructions are added to a packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+#include <string>
+
+namespace llvm {
+//
+// class DFAGen: class that generates and prints out the DFA for resource
+// tracking.
+//
+class DFAGen : public TableGenBackend {
+private:
+  std::string TargetName;
+  //
+  // allInsnClasses is the set of all possible resources consumed by an
+  // InstrStage.
+  //
+  DenseSet<unsigned> allInsnClasses;
+  RecordKeeper &Records;
+
+public:
+  DFAGen(RecordKeeper &R);
+
+  //
+  // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+  // used in each stage.
+  //
+  void collectAllInsnClasses(const std::string &Name,
+                            Record *ItinData,
+                            unsigned &NStages,
+                            raw_ostream &OS);
+
+  void run(raw_ostream &OS);
+};
+}
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 1953dad..6c3cae2 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -576,6 +576,8 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("VecListThreeD");
   REG("VecListFourD");
   REG("VecListTwoQ");
+  REG("VecListOneDAllLanes");
+  REG("VecListTwoDAllLanes");
 
   IMM("i32imm");
   IMM("i32imm_hilo16");
@@ -608,6 +610,14 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   IMM("nImmSplatI64");
   IMM("nImmVMOVI32");
   IMM("nImmVMOVF32");
+  IMM("imm8");
+  IMM("imm16");
+  IMM("imm32");
+  IMM("imm1_7");
+  IMM("imm1_15");
+  IMM("imm1_31");
+  IMM("imm0_1");
+  IMM("imm0_3");
   IMM("imm0_7");
   IMM("imm0_15");
   IMM("imm0_255");
@@ -746,7 +756,7 @@ static void ARMPopulateOperands(
       errs() << "Operand type: " << rec.getName() << '\n';
       errs() << "Operand name: " << operandInfo.Name << '\n';
       errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
-      llvm_unreachable("Unhandled type");
+      throw("Unhandled type in EDEmitter");
     }
   }
 }
diff --git a/utils/TableGen/LLVMBuild.txt b/utils/TableGen/LLVMBuild.txt
index b2a8ac6..b0081eb 100644
--- a/utils/TableGen/LLVMBuild.txt
+++ b/utils/TableGen/LLVMBuild.txt
@@ -20,4 +20,3 @@ type = BuildTool
 name = tblgen
 parent = BuildTools
 required_libraries = Support TableGen
-
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 3a6ff4e..34cd9a9 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -711,9 +711,13 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   std::string ClassName = Target + "GenSubtargetInfo";
   OS << "namespace llvm {\n";
+  OS << "class DFAPacketizer;\n";
   OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
      << "  explicit " << ClassName << "(StringRef TT, StringRef CPU, "
      << "StringRef FS);\n"
+     << "public:\n"
+     << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
+     << " const;\n"
      << "};\n";
   OS << "} // End llvm namespace \n";
 
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 12ebc57..3899a41 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -16,6 +16,7 @@
 #include "CallingConvEmitter.h"
 #include "CodeEmitterGen.h"
 #include "DAGISelEmitter.h"
+#include "DFAPacketizerEmitter.h"
 #include "DisassemblerEmitter.h"
 #include "EDEmitter.h"
 #include "FastISelEmitter.h"
@@ -47,6 +48,7 @@ enum ActionType {
   GenPseudoLowering,
   GenCallingConv,
   GenDAGISel,
+  GenDFAPacketizer,
   GenFastISel,
   GenSubtarget,
   GenIntrinsic,
@@ -79,6 +81,8 @@ namespace {
                                "Generate assembly instruction matcher"),
                     clEnumValN(GenDAGISel, "gen-dag-isel",
                                "Generate a DAG instruction selector"),
+                    clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
+                               "Generate DFA Packetizer for VLIW targets"),
                     clEnumValN(GenFastISel, "gen-fast-isel",
                                "Generate a \"fast\" instruction selector"),
                     clEnumValN(GenSubtarget, "gen-subtarget",
@@ -134,6 +138,9 @@ public:
     case GenDAGISel:
       DAGISelEmitter(Records).run(OS);
       break;
+    case GenDFAPacketizer:
+      DFAGen(Records).run(OS);
+      break;
     case GenFastISel:
       FastISelEmitter(Records).run(OS);
       break;
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 470ee76..a362fa6 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -32,7 +32,7 @@ DSTROOT = $(OBJROOT)/../dst
 
 #######################################################################
 
-PREFIX = /Developer/usr/local
+PREFIX = /usr/local
 
 # Unless assertions are forced on in the GMAKE command line, disable them.
 ifndef ENABLE_ASSERTIONS
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index f5f7c19..4b2d626 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -23,6 +23,43 @@ kUseCloseFDs = not kIsWindows
 # Use temporary files to replace /dev/null on Windows.
 kAvoidDevNull = kIsWindows
 
+def RemoveForce(f):
+    try:
+        os.remove(f)
+    except OSError:
+        pass
+
+def WinRename(f_o, f_n):
+    import time
+    retry_cnt = 256
+    while (True):
+        try:
+            os.rename(f_o, f_n)
+            break
+        except WindowsError, (winerror, strerror):
+            retry_cnt = retry_cnt - 1
+            if retry_cnt <= 0:
+                raise
+            elif winerror == 32: # ERROR_SHARING_VIOLATION
+                time.sleep(0.01)
+            else:
+                raise
+
+def WinWaitReleased(f):
+    import random
+    t = "%s%06d" % (f, random.randint(0, 999999))
+    RemoveForce(t)
+    try:
+        WinRename(f, t) # rename
+        WinRename(t, f) # restore
+    except WindowsError, (winerror, strerror):
+        if winerror in (2, 3):
+            # 2: ERROR_FILE_NOT_FOUND
+            # 3: ERROR_PATH_NOT_FOUND
+            pass
+        else:
+            raise
+
 def executeCommand(command, cwd=None, env=None):
     p = subprocess.Popen(command, cwd=cwd,
                          stdin=subprocess.PIPE,
@@ -68,6 +105,7 @@ def executeShCmd(cmd, cfg, cwd, results):
     input = subprocess.PIPE
     stderrTempFiles = []
     opened_files = []
+    written_files = []
     named_temp_files = []
     # To avoid deadlock, we use a single stderr stream for piped
     # output. This is null until we have seen some output using
@@ -124,6 +162,8 @@ def executeShCmd(cmd, cfg, cwd, results):
                     if r[1] == 'a':
                         r[2].seek(0, 2)
                     opened_files.append(r[2])
+                    if r[1] in 'aw':
+                        written_files.append(r[0])
                 result = r[2]
             final_redirects.append(result)
 
@@ -224,12 +264,14 @@ def executeShCmd(cmd, cfg, cwd, results):
         else:
             exitCode = res
 
+    # Make sure written_files is released by other (child) processes.
+    if (kIsWindows):
+        for f in written_files:
+            WinWaitReleased(f)
+
     # Remove any named temporary files we created.
     for f in named_temp_files:
-        try:
-            os.remove(f)
-        except OSError:
-            pass
+        RemoveForce(f)
 
     if cmd.negate:
         exitCode = not exitCode
diff --git a/utils/llvm-build/llvmbuild/componentinfo.py b/utils/llvm-build/llvmbuild/componentinfo.py
index b9a2d4f..230ae21 100644
--- a/utils/llvm-build/llvmbuild/componentinfo.py
+++ b/utils/llvm-build/llvmbuild/componentinfo.py
@@ -42,6 +42,13 @@ class ComponentInfo(object):
         self.parent_instance = None
         self.children = []
 
+        # The original source path.
+        self._source_path = None
+
+        # A flag to mark "special" components which have some amount of magic
+        # handling (generally based on command line options).
+        self._is_special_group = False
+
     def set_parent_instance(self, parent):
         assert parent.name == self.parent, "Unexpected parent!"
         self.parent_instance = parent
@@ -138,6 +145,23 @@ class LibraryComponentInfo(ComponentInfo):
     def get_library_name(self):
         return self.library_name or self.name
 
+    def get_prefixed_library_name(self):
+        """
+        get_prefixed_library_name() -> str
+
+        Return the library name prefixed by the project name. This is generally
+        what the library name will be on disk.
+        """
+
+        basename = self.get_library_name()
+
+        # FIXME: We need to get the prefix information from an explicit project
+        # object, or something.
+        if basename in ('gtest', 'gtest_main'):
+            return basename
+
+        return 'LLVM%s' % basename
+
     def get_llvmconfig_component_name(self):
         return self.get_library_name().lower()
 
@@ -177,7 +201,7 @@ class LibraryGroupComponentInfo(ComponentInfo):
         print >>result, 'type = %s' % self.type_name
         print >>result, 'name = %s' % self.name
         print >>result, 'parent = %s' % self.parent
-        if self.required_libraries:
+        if self.required_libraries and not self._is_special_group:
             print >>result, 'required_libraries = %s' % ' '.join(
                 self.required_libraries)
         if self.add_to_library_groups:
@@ -357,6 +381,16 @@ def load_from_path(path, subpath):
     parser = ConfigParser.RawConfigParser()
     parser.read(path)
 
+    # Extract the common section.
+    if parser.has_section("common"):
+        common = IniFormatParser(parser.items("common"))
+        parser.remove_section("common")
+    else:
+        common = IniFormatParser({})
+
+    return common, _read_components_from_parser(parser, path, subpath)
+
+def _read_components_from_parser(parser, path, subpath):
     # We load each section which starts with 'component' as a distinct component
     # description (so multiple components can be described in one file).
     for section in parser.sections():
@@ -390,4 +424,5 @@ def load_from_path(path, subpath):
             fatal("unable to load component %r in %r: %s" % (
                     section, path, e.message))
 
+        info._source_path = path
         yield info
diff --git a/utils/llvm-build/llvmbuild/main.py b/utils/llvm-build/llvmbuild/main.py
index 21cf392..36bca87 100644
--- a/utils/llvm-build/llvmbuild/main.py
+++ b/utils/llvm-build/llvmbuild/main.py
@@ -1,3 +1,4 @@
+import StringIO
 import os
 import sys
 
@@ -63,31 +64,25 @@ def make_install_dir(path):
 class LLVMProjectInfo(object):
     @staticmethod
     def load_infos_from_path(llvmbuild_source_root):
-        # FIXME: Implement a simple subpath file list cache, so we don't restat
-        # directories we have already traversed.
+        def recurse(subpath):
+            # Load the LLVMBuild file.
+            llvmbuild_path = os.path.join(llvmbuild_source_root + subpath,
+                                          'LLVMBuild.txt')
+            if not os.path.exists(llvmbuild_path):
+                fatal("missing LLVMBuild.txt file at: %r" % (llvmbuild_path,))
+
+            # Parse the components from it.
+            common,info_iter = componentinfo.load_from_path(llvmbuild_path,
+                                                            subpath)
+            for info in info_iter:
+                yield info
 
-        # First, discover all the LLVMBuild.txt files.
-        #
-        # FIXME: We would like to use followlinks=True here, but that isn't
-        # compatible with Python 2.4. Instead, we will either have to special
-        # case projects we would expect to possibly be linked to, or implement
-        # our own walk that can follow links. For now, it doesn't matter since
-        # we haven't picked up the LLVMBuild system in any other LLVM projects.
-        for dirpath,dirnames,filenames in os.walk(llvmbuild_source_root):
-            # If there is no LLVMBuild.txt file in a directory, we don't recurse
-            # past it. This is a simple way to prune our search, although it
-            # makes it easy for users to add LLVMBuild.txt files in places they
-            # won't be seen.
-            if 'LLVMBuild.txt' not in filenames:
-                del dirnames[:]
-                continue
+            # Recurse into the specified subdirectories.
+            for subdir in common.get_list("subdirectories"):
+                for item in recurse(os.path.join(subpath, subdir)):
+                    yield item
 
-            # Otherwise, load the LLVMBuild file in this directory.
-            assert dirpath.startswith(llvmbuild_source_root)
-            subpath = '/' + dirpath[len(llvmbuild_source_root)+1:]
-            llvmbuild_path = os.path.join(dirpath, 'LLVMBuild.txt')
-            for info in componentinfo.load_from_path(llvmbuild_path, subpath):
-                yield info
+        return recurse("/")
 
     @staticmethod
     def load_from_path(source_root, llvmbuild_source_root):
@@ -213,14 +208,45 @@ class LLVMProjectInfo(object):
 
             info_basedir[ci.subpath] = info_basedir.get(ci.subpath, []) + [ci]
 
+        # Compute the list of subdirectories to scan.
+        subpath_subdirs = {}
+        for ci in self.component_infos:
+            # Ignore root components.
+            if ci.subpath == '/':
+                continue
+
+            # Otherwise, append this subpath to the parent list.
+            parent_path = os.path.dirname(ci.subpath)
+            subpath_subdirs[parent_path] = parent_list = subpath_subdirs.get(
+                parent_path, set())
+            parent_list.add(os.path.basename(ci.subpath))
+
         # Generate the build files.
         for subpath, infos in info_basedir.items():
             # Order the components by name to have a canonical ordering.
             infos.sort(key = lambda ci: ci.name)
 
             # Format the components into llvmbuild fragments.
-            fragments = filter(None, [ci.get_llvmbuild_fragment()
-                                      for ci in infos])
+            fragments = []
+
+            # Add the common fragments.
+            subdirectories = subpath_subdirs.get(subpath)
+            if subdirectories:
+                fragment = """\
+subdirectories = %s
+""" % (" ".join(sorted(subdirectories)),)
+                fragments.append(("common", fragment))
+
+            # Add the component fragments.
+            num_common_fragments = len(fragments)
+            for ci in infos:
+                fragment = ci.get_llvmbuild_fragment()
+                if fragment is None:
+                    continue
+
+                name = "component_%d" % (len(fragments) - num_common_fragments)
+                fragments.append((name, fragment))
+
             if not fragments:
                 continue
 
@@ -231,7 +257,22 @@ class LLVMProjectInfo(object):
             if not os.path.exists(directory_path):
                 os.makedirs(directory_path)
 
-            # Create the LLVMBuild file.
+            # In an effort to preserve comments (which aren't parsed), read in
+            # the original file and extract the comments. We only know how to
+            # associate comments that prefix a section name.
+            f = open(infos[0]._source_path)
+            comments_map = {}
+            comment_block = ""
+            for ln in f:
+                if ln.startswith(';'):
+                    comment_block += ln
+                elif ln.startswith('[') and ln.endswith(']\n'):
+                    comments_map[ln[1:-2]] = comment_block
+                else:
+                    comment_block = ""
+            f.close()
+
+            # Create the LLVMBuild fil[e.
             file_path = os.path.join(directory_path, 'LLVMBuild.txt')
             f = open(file_path, "w")
 
@@ -259,10 +300,16 @@ class LLVMProjectInfo(object):
 ;===------------------------------------------------------------------------===;
 """ % header_string
 
-            for i,fragment in enumerate(fragments):
-                print >>f, '[component_%d]' % i
+            # Write out each fragment.each component fragment.
+            for name,fragment in fragments:
+                comment = comments_map.get(name)
+                if comment is not None:
+                    f.write(comment)
+                print >>f, "[%s]" % name
                 f.write(fragment)
-                print >>f
+                if fragment is not fragments[-1][1]:
+                    print >>f
+
             f.close()
 
     def write_library_table(self, output_path):
@@ -282,7 +329,7 @@ class LLVMProjectInfo(object):
             
             # Get the library name, or None for LibraryGroups.
             if c.type_name == 'Library':
-                library_name = c.get_library_name()
+                library_name = c.get_prefixed_library_name()
             else:
                 library_name = None
 
@@ -344,9 +391,7 @@ class LLVMProjectInfo(object):
             if library_name is None:
                 library_name_as_cstr = '0'
             else:
-                # If we had a project level component, we could derive the
-                # library prefix.
-                library_name_as_cstr = '"libLLVM%s.a"' % library_name
+                library_name_as_cstr = '"lib%s.a"' % library_name
             print >>f, '  { "%s", %s, { %s } },' % (
                 name, library_name_as_cstr,
                 ', '.join('"%s"' % dep
@@ -354,6 +399,37 @@ class LLVMProjectInfo(object):
         print >>f, '};'
         f.close()
 
+    def get_required_libraries_for_component(self, ci, traverse_groups = False):
+        """
+        get_required_libraries_for_component(component_info) -> iter
+
+        Given a Library component info descriptor, return an iterator over all
+        of the directly required libraries for linking with this component. If
+        traverse_groups is True, then library and target groups will be
+        traversed to include their required libraries.
+        """
+
+        assert ci.type_name in ('Library', 'LibraryGroup', 'TargetGroup')
+
+        for name in ci.required_libraries:
+            # Get the dependency info.
+            dep = self.component_info_map[name]
+
+            # If it is a library, yield it.
+            if dep.type_name == 'Library':
+                yield dep
+                continue
+
+            # Otherwise if it is a group, yield or traverse depending on what
+            # was requested.
+            if dep.type_name in ('LibraryGroup', 'TargetGroup'):
+                if not traverse_groups:
+                    yield dep
+                    continue
+
+                for res in self.get_required_libraries_for_component(dep, True):
+                    yield res
+
     def get_fragment_dependencies(self):
         """
         get_fragment_dependencies() -> iter
@@ -366,9 +442,15 @@ class LLVMProjectInfo(object):
         # Construct a list of all the dependencies of the Makefile fragment
         # itself. These include all the LLVMBuild files themselves, as well as
         # all of our own sources.
+        #
+        # Many components may come from the same file, so we make sure to unique
+        # these.
+        build_paths = set()
         for ci in self.component_infos:
-            yield os.path.join(self.source_root, ci.subpath[1:],
-                               'LLVMBuild.txt')
+            p = os.path.join(self.source_root, ci.subpath[1:], 'LLVMBuild.txt')
+            if p not in build_paths:
+                yield p
+                build_paths.add(p)
 
         # Gather the list of necessary sources by just finding all loaded
         # modules that are inside the LLVM source tree.
@@ -447,6 +529,24 @@ configure_file(\"%s\"
                ${CMAKE_CURRENT_BINARY_DIR}/DummyConfigureOutput)""" % (
                 cmake_quote_path(dep),)
 
+        # Write the properties we use to encode the required library dependency
+        # information in a form CMake can easily use directly.
+        print >>f, """
+# Explicit library dependency information.
+#
+# The following property assignments effectively create a map from component
+# names to required libraries, in a way that is easily accessed from CMake."""
+        for ci in self.ordered_component_infos:
+            # We only write the information for libraries currently.
+            if ci.type_name != 'Library':
+                continue
+
+            print >>f, """\
+set_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_%s %s)""" % (
+                ci.get_prefixed_library_name(), " ".join(sorted(
+                     dep.get_prefixed_library_name()
+                     for dep in self.get_required_libraries_for_component(ci))))
+
         f.close()
 
     def write_make_fragment(self, output_path):
@@ -590,6 +690,7 @@ def add_magic_target_components(parser, project, opts):
             fatal("special component %r must have empty %r list" % (
                     name, 'add_to_library_groups'))
 
+        info._is_special_group = True
         return info
 
     info_map = dict((ci.name, ci) for ci in project.component_infos)
diff --git a/utils/release/tag.sh b/utils/release/tag.sh
index d2d9911..80da47a 100755
--- a/utils/release/tag.sh
+++ b/utils/release/tag.sh
@@ -25,6 +25,7 @@ function usage() {
     echo " "
     echo "  -release <num>  The version number of the release"
     echo "  -rc <num>       The release candidate number"
+    echo "  -final          Tag final release candidate"
 }
 
 function tag_version() {
@@ -45,10 +46,10 @@ function tag_release_candidate() {
         if ! svn ls $base_url/$proj/tags/RELEASE_$release > /dev/null 2>&1 ; then
             svn mkdir -m "Creating release directory for release_$release." $base_url/$proj/tags/RELEASE_$release
         fi
-        if ! svn ls $base_url/$proj/tags/RELEASE_$release/rc$rc > /dev/null 2>&1 ; then
+        if ! svn ls $base_url/$proj/tags/RELEASE_$release/$rc > /dev/null 2>&1 ; then
             svn copy -m "Creating release candidate $rc from release_$release branch" \
                 $base_url/$proj/branches/release_$release \
-                $base_url/$proj/tags/RELEASE_$release/rc$rc
+                $base_url/$proj/tags/RELEASE_$release/$rc
         fi
     done
     set +x
@@ -62,7 +63,10 @@ while [ $# -gt 0 ]; do
             ;;
         -rc | --rc )
             shift
-            rc=$1
+            rc="rc$1"
+            ;;
+        -final | --final )
+            rc="final"
             ;;
         -h | --help | -help )
             usage
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index c0f283d..6ec2861 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -42,6 +42,7 @@ function usage() {
     echo ""
     echo " -release X.Y      The release number to test."
     echo " -rc NUM           The pre-release candidate number."
+    echo " -final            The final release candidate."
     echo " -j NUM            Number of compile jobs to run. [default: 3]"
     echo " -build-dir DIR    Directory to perform testing in. [default: pwd]"
     echo " -no-checkout      Don't checkout the sources from SVN."
@@ -64,7 +65,10 @@ while [ $# -gt 0 ]; do
             ;;
         -rc | --rc | -RC | --RC )
             shift
-            RC=$1
+            RC="rc$1"
+            ;;
+        -final | --final )
+            RC=final
             ;;
         -j* )
             NumJobs="`echo $1 | sed -e 's,-j\([0-9]*\),\1,g'`"
@@ -142,7 +146,7 @@ if [ -z "$NumJobs" ]; then
 fi
 
 # Go to the build directory (may be different from CWD)
-BuildDir=$BuildDir/rc$RC
+BuildDir=$BuildDir/$RC
 mkdir -p $BuildDir
 cd $BuildDir
 
@@ -177,7 +181,7 @@ function check_valid_urls() {
     for proj in $projects ; do
         echo "# Validating $proj SVN URL"
 
-        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/rc$RC > /dev/null 2>&1 ; then
+        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC > /dev/null 2>&1 ; then
             echo "llvm $Release release candidate $RC doesn't exist!"
             exit 1
         fi
@@ -190,7 +194,7 @@ function export_sources() {
 
     for proj in $projects ; do
         echo "# Exporting $proj $Release-RC$RC sources"
-        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/rc$RC $proj.src ; then
+        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC $proj.src ; then
             echo "error: failed to export $proj project"
             exit 1
         fi
@@ -238,7 +242,7 @@ function configure_llvmCore() {
     echo "# Using C++ compiler: $cxx_compiler"
 
     cd $ObjDir
-    echo "# Configuring llvm $Release-rc$RC $Flavor"
+    echo "# Configuring llvm $Release-$RC $Flavor"
     echo "# $BuildDir/llvm.src/configure --prefix=$InstallDir \
         --enable-optimized=$Optimized \
         --enable-assertions=$Assertions"
@@ -262,12 +266,12 @@ function build_llvmCore() {
     fi
 
     cd $ObjDir
-    echo "# Compiling llvm $Release-rc$RC $Flavor"
+    echo "# Compiling llvm $Release-$RC $Flavor"
     echo "# ${MAKE} -j $NumJobs VERBOSE=1 $ExtraOpts"
     ${MAKE} -j $NumJobs VERBOSE=1 $ExtraOpts \
         2>&1 | tee $LogDir/llvm.make-Phase$Phase-$Flavor.log
 
-    echo "# Installing llvm $Release-rc$RC $Flavor"
+    echo "# Installing llvm $Release-$RC $Flavor"
     echo "# ${MAKE} install"
     ${MAKE} install \
         2>&1 | tee $LogDir/llvm.install-Phase$Phase-$Flavor.log
@@ -285,7 +289,7 @@ function build_dragonegg() {
     echo "# Targeted compiler: $gcc_compiler"
 
     cd $DragonEggObjDir
-    echo "# Compiling phase $Phase dragonegg $Release-rc$RC $Flavor"
+    echo "# Compiling phase $Phase dragonegg $Release-$RC $Flavor"
     echo -n "# CXX=$cxx_compiler TOP_DIR=$TOP_DIR GCC=$gcc_compiler "
     echo -n "LLVM_CONFIG=$LLVM_CONFIG ${MAKE} -f $TOP_DIR/Makefile "
     echo "-j $NumJobs VERBOSE=1"
@@ -331,7 +335,7 @@ for Flavor in $Flavors ; do
     echo ""
     echo ""
     echo "********************************************************************************"
-    echo "  Release:     $Release-rc$RC"
+    echo "  Release:     $Release-$RC"
     echo "  Build:       $Flavor"
     echo "  System Info: "
     echo "    `uname -a`"
@@ -341,21 +345,21 @@ for Flavor in $Flavors ; do
     c_compiler="$CC"
     cxx_compiler="$CXX"
 
-    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.install
-    dragonegg_phase1_objdir=$BuildDir/Phase1/$Flavor/DragonEgg-$Release-rc$RC.obj
+    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-$RC.install
+    dragonegg_phase1_objdir=$BuildDir/Phase1/$Flavor/DragonEgg-$Release-$RC.obj
 
-    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.install
-    llvmCore_de_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-rc$RC.obj
-    llvmCore_de_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-rc$RC.install
-    dragonegg_phase2_objdir=$BuildDir/Phase2/$Flavor/DragonEgg-$Release-rc$RC.obj
+    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-$RC.install
+    llvmCore_de_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
+    llvmCore_de_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
+    dragonegg_phase2_objdir=$BuildDir/Phase2/$Flavor/DragonEgg-$Release-$RC.obj
 
-    llvmCore_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-rc$RC.install
-    llvmCore_de_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-rc$RC.obj
-    llvmCore_de_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-rc$RC.install
-    dragonegg_phase3_objdir=$BuildDir/Phase3/$Flavor/DragonEgg-$Release-rc$RC.obj
+    llvmCore_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-$RC.install
+    llvmCore_de_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
+    llvmCore_de_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
+    dragonegg_phase3_objdir=$BuildDir/Phase3/$Flavor/DragonEgg-$Release-$RC.obj
 
     rm -rf $llvmCore_phase1_objdir
     rm -rf $llvmCore_phase1_installdir
@@ -494,7 +498,7 @@ for Flavor in $Flavors ; do
         test_llvmCore 1 $Flavor $llvmCore_phase1_objdir
     fi
 done
-) 2>&1 | tee $LogDir/testing.$Release-rc$RC.log
+) 2>&1 | tee $LogDir/testing.$Release-$RC.log
 
 set +e
 
diff --git a/utils/unittest/CMakeLists.txt b/utils/unittest/CMakeLists.txt
index d882de2..73491f0 100644
--- a/utils/unittest/CMakeLists.txt
+++ b/utils/unittest/CMakeLists.txt
@@ -40,11 +40,3 @@ add_llvm_library(gtest
 add_llvm_library(gtest_main
   UnitTestMain/TestMain.cpp
   )
-
-add_llvm_library_dependencies(gtest
-  LLVMSupport
-  )
-
-add_llvm_library_dependencies(gtest_main
-  gtest
-  )
diff --git a/utils/unittest/UnitTestMain/Makefile b/utils/unittest/UnitTestMain/Makefile
index 3082779..7bcb724 100644
--- a/utils/unittest/UnitTestMain/Makefile
+++ b/utils/unittest/UnitTestMain/Makefile
@@ -11,7 +11,7 @@ LEVEL = ../../..
 
 include $(LEVEL)/Makefile.config
 
-LIBRARYNAME = UnitTestMain
+LIBRARYNAME = gtest_main
 BUILD_ARCHIVE = 1
 REQUIRES_RTTI = 1
 
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index 21b29ff..22c8f36 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -11,7 +11,7 @@ LEVEL := ../../..
 
 include $(LEVEL)/Makefile.config
 
-LIBRARYNAME = GoogleTest
+LIBRARYNAME = gtest
 BUILD_ARCHIVE = 1
 REQUIRES_RTTI = 1
author	Logan Chien <loganchien@google.com>	2011-12-16 09:08:45 +0800
committer	Logan Chien <loganchien@google.com>	2011-12-16 13:28:58 +0800
commit	a1e6e241a813f81be2d2f36ab60c950ca297574b (patch)
tree	cf2d7ec5c63f40e2b66d8be7737496719a0d7902
parent	ac212abcc6d858470ad35ce7d660af0c1800364a (diff)
parent	ddecfe54a35ffbe0675f7f33e493734fd60b2495 (diff)
download	external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.zip external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.gz external_llvm-a1e6e241a813f81be2d2f36ab60c950ca297574b.tar.bz2